- /runtime/libs/misc
- /runtime/libs/ndarray
- /runtime/onert
+ - /runtime/service/npud/tests
- /tests/nnfw_api
testFile:
* text eol=lf
# Binary - ignore text file setting
-*.caffemodel -text
-*.png -text
-*.pdf -text
-*.h5 -text
-*.tar.gz -text
-*.tflite -text
-*.bmp -text
+*.bmp binary
+*.caffemodel binary
+*.h5 binary
+*.jar binary
+*.pdf binary
+*.png binary
+*.tar.gz binary
+*.tflite binary
steps:
- name: Checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v3
- name: Setup python
- uses: actions/setup-python@v2
+ uses: actions/setup-python@v4
with:
python-version: '3.x'
steps:
- name: Checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v3
with:
# Fetch all history and branch (default: 1)
# Require all history to get file creation date
steps:
- name: Checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v3
with:
# Checkout PR head commit
# Checkout Action use merge commit as default
--- /dev/null
+name: Build and deploy github page
+
+on:
+ schedule:
+ # Every weeks
+ - cron: '30 19 * * SUN'
+ workflow_dispatch:
+ inputs:
+ publish:
+ description: 'Push to github page branch or not'
+ required: true
+ default: false
+ type: boolean
+
+jobs:
+ build_and_deploy:
+ name: 'Deploy doxygen page'
+ runs-on: 'ubuntu-latest'
+ if: github.repository_owner == 'Samsung'
+
+ steps:
+ - name: 'Checkout'
+ uses: actions/checkout@v3
+ - name: 'Generate HTML'
+ uses: mattnotmitt/doxygen-action@v1.9
+ with:
+ doxyfile-path: 'infra/doxygen/Doxyfile'
+ - name: 'Tar artifact'
+ run: tar -zcf doxygen.tar.gz -C doxygen/html ./
+ - name: 'Generate artifact'
+ uses: actions/upload-artifact@v3
+ with:
+ name: doxygen-html
+ path: doxygen.tar.gz
+ - name: 'Deploy'
+ if: ${{ github.event_name == 'schedule' || github.event.inputs.publish == 'true' }}
+ uses: JamesIves/github-pages-deploy-action@v4
+ with:
+ folder: doxygen/html
+ branch: gh-pages
+#!/usr/bin/make -f
+
HOST_ARCH?=$(shell uname -m)
TARGET_ARCH?=$(shell uname -m)
BUILD_TYPE?=Debug
HOST_OS?=linux
TARGET_OS?=linux
COVERAGE_BUILD?=0
-BENCHMARK_ACL_BUILD?=0
OPTIONS?=
# make TARGET and TYPE to lowercase
TARGET_ARCH_LC=aarch64
endif
ifneq (,$(findstring android,$(TARGET_OS)))
- # Anndroid only allow aarch64 target-arch
+ # Android only allow aarch64 target-arch
TARGET_ARCH_LC=aarch64
- TARGET_OS=android
endif
# Set CROSS_BUILD=1 when ROOTFS_DIR is given, and TARGET_ARCH is different to HOST_ARCH.
ifneq ($(ROOTFS_DIR),)
OPTIONS+= -DCMAKE_TOOLCHAIN_FILE=$(TOOLCHAIN_FILE)
endif
-ifeq ($(COVERAGE_BUILD),1)
+ifneq ($(filter create-covsuite,$(MAKECMDGOALS)),)
OPTIONS+= -DENABLE_COVERAGE=ON
else
- OPTIONS+= -DENABLE_COVERAGE=OFF
-endif
-
-ifeq ($(BENCHMARK_ACL_BUILD),1)
- OPTIONS+= -DBUILD_BENCHMARK_ACL=1
-endif
-
-ifneq ($(EXT_HDF5_DIR),)
- $(info Hello $(EXT_HDF5_DIR))
- OPTIONS+= -DEXT_HDF5_DIR=$(EXT_HDF5_DIR)
+ ifeq ($(COVERAGE_BUILD),1)
+ OPTIONS+= -DENABLE_COVERAGE=ON
+ else
+ OPTIONS+= -DENABLE_COVERAGE=OFF
+ endif
endif
ifneq ($(EXTERNAL_VOLUME),)
WORKFOLDER=$(TARGET_ARCH_LC)-$(TARGET_OS).$(BUILD_TYPE_LC)
WORKSPACE=$(WORKHOME)/$(WORKFOLDER)
-BUILD_FOLDER=$(WORKSPACE)/obj
INSTALL_PATH?=$(WORKSPACE)/out
OVERLAY_FOLDER?=$(WORKSPACE)/overlay
-BUILD_ALIAS=$(WORKHOME)/obj
INSTALL_ALIAS=$(WORKHOME)/out
TIMESTAMP_CONFIGURE=$(WORKSPACE)/CONFIGURE
TIMESTAMP_BUILD=$(WORKSPACE)/BUILD
TIMESTAMP_INSTALL=$(WORKSPACE)/INSTALL
-all: build
+###
+### Common environment variable
+###
+export NNFW_WORKSPACE=$(WORKSPACE)
+
+###
+### Default target
+###
+all: install
###
### Command (public)
build: build_internal
-install: $(TIMESTAMP_INSTALL)
+install: install_all_internal
-create_package: runtime_tar_internal
+create-package: runtime_tar_internal
-create_acl_tar: acl_tar_internal
+create-aclpack: acl_tar_internal
+
+create-testsuite: test_suite_internal
+
+create-covsuite: coverage_suite_internal
clean:
rm -rf $(WORKSPACE)
distclean:
- rm -rf $(WORKSPACE)
- rm -rf externals/*.stamp
+ rm -rf Product
+ rm -rf externals
rm -rf tests/nnapi/src/generated/
+# create_package, create_acl_tar: to be removed
+create_package: runtime_tar_internal
+create_acl_tar: acl_tar_internal
+
###
### Command (internal)
###
-configure_internal:
-# TODO Remove setting EXT_ACL_FOLDER
-# Construct overlay folder directly outside (with headers?)
-ifneq ($(EXT_ACL_FOLDER),)
- mkdir -p $(OVERLAY_FOLDER)/lib
- cp $(EXT_ACL_FOLDER)/* $(OVERLAY_FOLDER)/lib
-# Make stamp file
- printf "21.02" > $(OVERLAY_FOLDER)/ARMCOMPUTE.stamp
-endif
+$(WORKSPACE):
+ mkdir -p $@
+configure_internal: $(WORKSPACE)
ifneq ($(DEBIAN_BUILD),)
test -d externals || mkdir -p externals
find packaging/ -type f -name "*.tar.gz" | xargs -i tar xf {} -C externals
endif
-
- NNFW_WORKSPACE="$(WORKSPACE)" NNFW_INSTALL_PREFIX=$(INSTALL_PATH) ./nnfw configure \
+ NNFW_INSTALL_PREFIX=$(INSTALL_PATH) ./nnfw configure \
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE_LC) \
-DNNFW_OVERLAY_DIR=$(OVERLAY_FOLDER) \
-DEXTERNALS_BUILD_THREADS=$(NPROCS) \
$(OPTIONS)
- touch $(TIMESTAMP_CONFIGURE)
-build_internal: $(BUILD_FOLDER)
- NNFW_WORKSPACE="$(WORKSPACE)" ./nnfw build -j $(NPROCS)
- rm -rf $(BUILD_ALIAS)
- ln -s $(BUILD_FOLDER) $(BUILD_ALIAS)
- touch $(TIMESTAMP_BUILD)
+build_internal: configure_internal
+ ./nnfw build -j $(NPROCS)
-install_internal:
- NNFW_WORKSPACE="$(WORKSPACE)" ./nnfw install
+install_internal: build_internal
+ ./nnfw install
rm -rf $(INSTALL_ALIAS)
ln -s $(INSTALL_PATH) $(INSTALL_ALIAS)
- touch $(TIMESTAMP_INSTALL)
-runtime_tar_internal: $(TIMESTAMP_BUILD) install_internal
+runtime_tar_internal: build_internal install_internal
tar -zcf $(WORKSPACE)/onert-package.tar.gz -C $(INSTALL_PATH) lib
tar -zcf $(WORKSPACE)/onert-devel-package.tar.gz -C $(INSTALL_PATH) include/nnfw
tar -zcf $(WORKSPACE)/onert-plugin-devel-package.tar.gz -C $(INSTALL_PATH) include/onert
tar -zcf $(WORKSPACE)/onert-test-package.tar.gz -C $(INSTALL_PATH) $(shell ls $(INSTALL_PATH) -I lib -I include)
-acl_tar_internal: $(BUILD_FOLDER)
+acl_tar_internal: configure_internal
tar -zcf $(WORKSPACE)/onert-acl.tar.gz -C ${OVERLAY_FOLDER} lib/libarm_compute.so lib/libarm_compute_core.so lib/libarm_compute_graph.so
-install_internal_acl:
+install_acl_internal:
# Workaround to install acl for test (ignore error when there is no file to copy)
cp $(OVERLAY_FOLDER)/lib/libarm_compute*.so $(INSTALL_ALIAS)/lib || true
-build_test_suite: install_internal install_internal_acl
+install_all_internal: install_internal install_acl_internal
+
+test_suite_internal: install_all_internal
@echo "packaging test suite"
@rm -rf $(INSTALL_PATH)/test-suite.tar.gz
# TODO Divide runtime package, external library package, and test suite
@tar -zcf test-suite.tar.gz tests/scripts infra Product/out --dereference
@mv test-suite.tar.gz $(INSTALL_PATH)/.
-build_coverage_suite: install_internal install_internal_acl
+coverage_suite_internal: install_all_internal
@echo "packaging test-coverage suite"
@rm -rf $(INSTALL_PATH)/coverage-suite.tar.gz
@find Product -name "*.gcno" > include_lists.txt
@tar -zcf coverage-suite.tar.gz tests/scripts infra Product/out --dereference -T include_lists.txt
@rm -rf include_lists.txt tests/scripts/build_path_depth.txt
@mv coverage-suite.tar.gz $(INSTALL_PATH)/.
-
-###
-### Timestamps
-###
-$(WORKSPACE):
- mkdir -p $@
-
-$(BUILD_FOLDER): $(WORKSPACE) configure_internal
-
-$(TIMESTAMP_CONFIGURE): configure_internal
-
-$(TIMESTAMP_BUILD): $(TIMESTAMP_CONFIGURE) build_internal
-
-$(TIMESTAMP_INSTALL): $(TIMESTAMP_BUILD) install_internal install_internal_acl
+# get CODENAME to perform per codename actions
+# set focal as default
+set(ONE_UBUNTU_CODENAME "focal")
+find_program(LSB_RELEASE_EXEC lsb_release)
+if(LSB_RELEASE_EXEC)
+ # output should be one of 'bionic', 'focal', 'jammy'
+ # others are not tested
+ execute_process(COMMAND "${LSB_RELEASE_EXEC}" --short --codename
+ OUTPUT_VARIABLE ONE_UBUNTU_CODENAME
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+else()
+ message(STATUS "WARNING: lsb_release not found")
+endif()
+
+if(${ONE_UBUNTU_CODENAME} STREQUAL "jammy")
+ set(ONE_UBUNTU_CODENAME_JAMMY TRUE)
+endif()
+
# TODO Validate the argument of "requires"
function(get_project_build_order VAR)
# This file will describe the dependencies among projects
for (const auto node : nodes)
{
const auto input_node = loco::must_cast<const luci::CircleInput *>(node);
+ const auto dtype_size = loco::size(input_node->dtype());
size_t element_size = 1;
for (uint32_t index = 0; index < input_node->rank(); index++)
element_size *= input_node->dim(index).value();
}
- vec.push_back(element_size);
+ vec.push_back(element_size * dtype_size);
}
return vec;
target_link_libraries(circle_execution_plan luci_export)
target_link_libraries(circle_execution_plan luci_plan)
target_link_libraries(circle_execution_plan arser)
+target_link_libraries(circle_execution_plan luci_log)
target_include_directories(circle_execution_plan PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/pal")
install(TARGETS circle_execution_plan DESTINATION bin)
CMSISNN
};
+enum RuntimeType
+{
+ ONERT_MICRO,
+ LUCI_INTERPRETER
+};
+
+enum AllocatingMode
+{
+ COMMON,
+ SPLIT
+};
+
struct TargetPlatform
{
SupportedPlatformType platform_type;
arser.add_argument("input").help("Input circle model");
arser.add_argument("output").help("Output circle model");
arser.add_argument("--platform").default_value("linux").help("Platform name: linux mcu cmsisnn");
+ arser.add_argument("--allocating_mode")
+ .default_value("common")
+ .help("Buffer type name (only onert-micro option):"
+ "common - a single buffer is considered for all allocations"
+ "split - there are three buffers: for input,"
+ " for output and for intermediate tensors");
+ arser.add_argument("--runtime")
+ .default_value("onert_micro")
+ .help("Target runtime name: luci-interpreter onert-micro");
+ arser.add_argument("--allocate_const")
+ .nargs(1)
+ .type(arser::DataType::BOOL)
+ .required(false)
+ .default_value(false)
+ .help("Whether or not to take into account constants in memory allocation. "
+ "Default value - false, constants are not counted when allocating memory");
+ arser.add_argument("--allocate_input")
+ .nargs(1)
+ .type(arser::DataType::BOOL)
+ .required(false)
+ .default_value(true)
+ .help("Whether or not to take into account inputs in memory allocation. "
+ "Default value - true, inputs are counted when allocating memory");
arser.add_argument("--use_dsp")
.nargs(1)
.type(arser::DataType::BOOL)
const std::string input_path = arser.get<std::string>("input");
const std::string output_path = arser.get<std::string>("output");
const std::string platform_name = arser.get<std::string>("--platform");
+ const std::string allocating_mode_name = arser.get<std::string>("--allocating_mode");
+ const std::string runtime_name = arser.get<std::string>("--runtime");
const bool use_dsp = arser.get<bool>("--use_dsp");
+ const bool is_allocate_const = arser.get<bool>("--allocate_const");
+ const bool is_allocate_input = arser.get<bool>("--allocate_input");
const std::string json_path = arser.get<std::string>("--save_allocations");
if (platform_name != "cmsisnn" && use_dsp)
return EXIT_FAILURE;
}
+ circle_planner::AllocatingMode allocating_mode;
+ if (allocating_mode_name == "split")
+ {
+ allocating_mode = circle_planner::AllocatingMode::SPLIT;
+ }
+ else if (allocating_mode_name == "common")
+ {
+ allocating_mode = circle_planner::AllocatingMode::COMMON;
+ }
+ else
+ {
+ std::cerr << "ERROR: Invalid allocation mode name '" << allocating_mode_name << "'"
+ << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ circle_planner::RuntimeType runtime_type;
+ if (runtime_name == "onert-micro")
+ {
+ runtime_type = circle_planner::RuntimeType::ONERT_MICRO;
+ }
+ else if (runtime_name == "luci-interpreter")
+ {
+ runtime_type = circle_planner::RuntimeType::LUCI_INTERPRETER;
+ }
+ else
+ {
+ std::cerr << "ERROR: Invalid runtime name '" << runtime_name << "'" << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ if (allocating_mode == circle_planner::AllocatingMode::SPLIT and
+ runtime_type == circle_planner::RuntimeType::LUCI_INTERPRETER)
+ {
+ std::cerr << "Split buffer type can only be used with onert-micro runtime" << std::endl;
+ return EXIT_FAILURE;
+ }
+
bool is_save_allocations = false;
if (!json_path.empty())
auto module = importer.importModule(circle_model);
// Do main job
- circle_planner::ExecutionPlanner execution_planner(module->graph(), {platform_type, use_dsp});
+ circle_planner::ExecutionPlanner execution_planner(module->graph(), {platform_type, use_dsp},
+ runtime_type, allocating_mode);
+ execution_planner.change_planning_mode(is_allocate_const, is_allocate_input, true);
execution_planner.make_execution_plan();
if (is_save_allocations)
#include "ExecutionPlanner.h"
#include <loco/IR/Algorithm.h>
#include <luci/UserSettings.h>
+#include <luci/Log.h>
#include <json.h>
#include <fstream>
+#include <limits> // std::numeric_limits
+
namespace circle_planner
{
namespace
allocations_node.append(allocation_node);
}
+// TODO: Introduce inplace optimization
+bool can_be_inplace_optimization_node(luci::CircleNode *node)
+{
+ switch (node->opcode())
+ {
+ case luci::CircleOpcode::LOGISTIC:
+ case luci::CircleOpcode::RESHAPE:
+ case luci::CircleOpcode::EXPAND_DIMS:
+ return true;
+ default:
+ return false;
+ }
+}
+
} // namespace
-void ExecutionPlanner::make_execution_plan()
+void ExecutionPlanner::make_execution_plan_onert_micro_base()
+{
+ switch (_allocating_mode)
+ {
+ case AllocatingMode::COMMON:
+ make_execution_plan_onert_micro_common_buffer();
+ break;
+ case AllocatingMode::SPLIT:
+ make_execution_plan_onert_micro_split_buffer();
+ break;
+ default:
+ throw std::runtime_error("Unsupported buffer type\n");
+ }
+}
+
+void ExecutionPlanner::write_execution_plan(uint32_t order_offset)
+{
+ _required_size = get_offsets_with_greedy_by_size();
+
+ int32_t counter_ops = 0;
+ for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+ {
+ const auto circle_node = dynamic_cast<luci::CircleNode *>(_ordered_nodes[i]);
+ if (circle_node->opcode() != luci::CircleOpcode::CIRCLECONST and
+ circle_node->opcode() != luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+ {
+ luci::CircleNodeExecutionPlan execution_plan(counter_ops + order_offset, _offsets[i]);
+ luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
+ execution_plan);
+ counter_ops++;
+ }
+ }
+}
+
+void ExecutionPlanner::make_execution_plan_onert_micro_split_buffer()
+{
+ LOGGER(l);
+
+ const auto input_size = _graph->inputs()->size();
+ const auto output_size = _graph->outputs()->size();
+
+ // Make execution plan for inputs
+ _ordered_nodes = loco::input_nodes(_graph);
+ write_execution_plan(0);
+ dump_inform();
+ VERBOSE(l, 0) << "Input graph buffer required memory = " << _required_size << std::endl;
+
+ // Clear structures for next buffer
+ _ordered_nodes.clear();
+ _alloc_node_inform_vector.clear();
+ _dealloc_node.clear();
+ _alloc_node.clear();
+ _offsets.clear();
+ _required_size = 0;
+
+ // Make execution plan for outputs
+ _ordered_nodes = loco::output_nodes(_graph);
+ write_execution_plan(input_size);
+ dump_inform();
+ VERBOSE(l, 0) << "Output graph buffer required memory = " << _required_size << std::endl;
+
+ // Clear structures for next buffer
+ _ordered_nodes.clear();
+ _alloc_node_inform_vector.clear();
+ _dealloc_node.clear();
+ _alloc_node.clear();
+ _offsets.clear();
+ _required_size = 0;
+
+ // Make execution plan for intermediates calculations
+ get_default_execution_order_plan_without_inputs_and_outputs();
+ write_execution_plan(input_size + output_size);
+ dump_inform();
+ VERBOSE(l, 0) << "Main graph buffer required memory = " << _required_size << std::endl;
+}
+
+void ExecutionPlanner::make_execution_plan_onert_micro_common_buffer()
+{
+ LOGGER(l);
+
+ get_default_execution_order_plan();
+ _required_size = get_offsets_with_greedy_by_size();
+
+ // Find prev nodes for output nodes (actual graph output node, not luci::CircleOutput)
+ const auto output_nodes = loco::output_nodes(const_cast<loco::Graph *>(_graph));
+ std::vector<loco::Node *> output_prev_nodes;
+ for (const auto output_node : output_nodes)
+ {
+ const auto prev_nodes = loco::preds(output_node);
+ std::copy(prev_nodes.begin(), prev_nodes.end(), std::back_inserter(output_prev_nodes));
+ }
+ const auto output_nodes_size = output_prev_nodes.size();
+
+ const auto inputs_nodes = loco::input_nodes(_graph);
+ const auto input_nodes_size = inputs_nodes.size();
+
+ int32_t counter_ops = 0;
+ for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+ {
+ const auto circle_node = dynamic_cast<luci::CircleNode *>(_ordered_nodes[i]);
+ // First write to input nodes
+ if (circle_node->opcode() == luci::CircleOpcode::CIRCLEINPUT)
+ {
+ // Find input_position for proper position in execution order
+ const auto input_position = std::distance(
+ inputs_nodes.begin(), std::find(inputs_nodes.begin(), inputs_nodes.end(), circle_node));
+ luci::CircleNodeExecutionPlan execution_plan(input_position, _offsets[i]);
+ luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
+ execution_plan);
+ }
+ // Second write to actual output nodes (not luci::CircleOutput)
+ else if (std::find(output_prev_nodes.begin(), output_prev_nodes.end(), circle_node) !=
+ output_prev_nodes.end())
+ {
+ // Find output_position for proper position in execution order
+ const auto output_position =
+ std::distance(output_prev_nodes.begin(),
+ std::find(output_prev_nodes.begin(), output_prev_nodes.end(), circle_node));
+ luci::CircleNodeExecutionPlan execution_plan(input_nodes_size + output_position, _offsets[i]);
+ luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
+ execution_plan);
+ }
+ // Finally write to all intermediate nodes
+ else if (circle_node->opcode() != luci::CircleOpcode::CIRCLECONST and
+ circle_node->opcode() != luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+ {
+ luci::CircleNodeExecutionPlan execution_plan(
+ counter_ops + input_nodes_size + output_nodes_size, _offsets[i]);
+ luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
+ execution_plan);
+ counter_ops++;
+ }
+ }
+
+ dump_inform();
+ VERBOSE(l, 0) << "Buffer required memory = " << _required_size << std::endl;
+}
+
+void ExecutionPlanner::make_execution_plan_luci_interpreter()
{
+ LOGGER(l);
+
get_default_execution_order_plan();
_required_size = get_offsets_with_greedy_by_size();
for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
execution_plan);
}
+
+ VERBOSE(l, 0) << "Buffer required memory = " << _required_size << std::endl;
+ dump_inform();
+}
+
+void ExecutionPlanner::make_execution_plan()
+{
+ switch (_runtime_type)
+ {
+ case ONERT_MICRO:
+ make_execution_plan_onert_micro_base();
+ break;
+ case LUCI_INTERPRETER:
+ make_execution_plan_luci_interpreter();
+ break;
+ default:
+ throw std::runtime_error("Unsupported runtime platform\n");
+ }
+
auto settings = luci::UserSettings::settings();
settings->set(luci::UserSettings::Key::ExecutionPlanGen, true);
}
_ordered_nodes = loco::postorder_traversal(loco::output_nodes(const_cast<loco::Graph *>(_graph)));
}
+void ExecutionPlanner::get_default_execution_order_plan_without_inputs_and_outputs()
+{
+ // Get all nodes
+ _ordered_nodes = loco::postorder_traversal(loco::output_nodes(const_cast<loco::Graph *>(_graph)));
+
+ // Get real output nodes (not luci::CircleOutput)
+ const auto output_nodes = loco::output_nodes(const_cast<loco::Graph *>(_graph));
+ std::vector<loco::Node *> output_prev_nodes;
+ for (const auto output_node : output_nodes)
+ {
+ const auto prev_nodes = loco::preds(output_node);
+ std::copy(prev_nodes.begin(), prev_nodes.end(), std::back_inserter(output_prev_nodes));
+ }
+
+ // Remove input and real output nodes from _ordered_nodes
+ _ordered_nodes.erase(
+ std::remove_if(_ordered_nodes.begin(), _ordered_nodes.end(),
+ [&output_prev_nodes](auto node) {
+ const auto circle_node = dynamic_cast<luci::CircleNode *>(node);
+
+ return circle_node->opcode() == luci::CircleOpcode::CIRCLEINPUT or
+ circle_node->opcode() == luci::CircleOpcode::CIRCLEOUTPUT or
+ std::find(output_prev_nodes.begin(), output_prev_nodes.end(), node) !=
+ output_prev_nodes.end();
+ }),
+ _ordered_nodes.end());
+}
+
void ExecutionPlanner::get_usage_interval()
{
// Initialize vectors of first and last nodes for usage interval
for (auto &output_node : output_nodes(_graph))
{
auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), output_node);
+ if (it == _ordered_nodes.end())
+ continue;
size_t index = std::distance(_ordered_nodes.begin(), it);
usages_counts[index]++;
}
for (auto &input_node : input_nodes(_graph))
{
auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), input_node);
+ if (it == _ordered_nodes.end())
+ continue;
size_t index = std::distance(_ordered_nodes.begin(), it);
usages_counts[index]++;
allocate(0, index);
uint32_t ExecutionPlanner::greedy_by_size_approach()
{
size_t result_size = 0;
- create_alloc_node_inform_vector(_is_null_consts, _is_null_inputs, _is_null_scratchpads);
+ create_alloc_node_inform_vector();
std::vector<AllocationNodeInformation> ordered_alloc_inform;
for (auto ¤t_node : _alloc_node_inform_vector)
{
return result_size;
}
-void ExecutionPlanner::create_alloc_node_inform_vector(bool null_consts, bool null_inputs,
- bool null_scratchpad)
+void ExecutionPlanner::create_alloc_node_inform_vector()
{
auto node_compare = [this](const AllocationNodeInformation &alloc_1,
const AllocationNodeInformation &alloc_2) {
_alloc_node_inform_vector[i].last_node = _dealloc_node[i];
const auto *const_node = dynamic_cast<const luci::CircleConst *>(circle_node);
- if (i == 0 && null_inputs)
+ if (circle_node->opcode() == luci::CircleOpcode::CIRCLEINPUT && not _is_allocate_inputs)
+ {
+ _alloc_node_inform_vector[i].size = 0;
+ }
+ else if (circle_node->opcode() == luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
{
_alloc_node_inform_vector[i].size = 0;
}
- else if (const_node && null_consts)
+ else if (const_node && not _is_allocate_consts)
{
_alloc_node_inform_vector[i].size = 0;
}
// Scratchpad If needed
std::vector<uint32_t> scratchpad_sizes;
- if (!null_scratchpad)
+ if (_is_allocate_scratchpads)
{
switch (circle_node->opcode())
{
void ExecutionPlanner::dump_inform()
{
+ LOGGER(l);
uint32_t max_breadth = 0;
for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
}
auto node = loco::must_cast<luci::CircleNode *>(_ordered_nodes.at(i));
- printf("node_num = %d node_name = %s node_size = %d node_offset = %d node_breadth = "
- "%u node_first_node = %d node_last_node = %d\n",
- i, node->name().c_str(), current_node_it->size, current_node_it->offset,
- current_node_it->breadth, current_node_it->first_node, current_node_it->last_node);
+ VERBOSE(l, 0) << "node_num = " << i << " node_name = " << node->name().c_str()
+ << " node_size = " << current_node_it->size
+ << " node_offset = " << current_node_it->offset
+ << " node_breadth = " << current_node_it->breadth
+ << " node_first_node = " << current_node_it->first_node
+ << " node_last_node = " << current_node_it->last_node << std::endl;
}
- printf("Lower bound is = %u\n", max_breadth);
+ VERBOSE(l, 0) << "Lower bound = " << max_breadth << std::endl;
std::sort(_alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
[](const AllocationNodeInformation &first, const AllocationNodeInformation &second) {
if (first.breadth != second.breadth)
_scratchpad_helper = std::make_unique<ScratchpadHelperLinux>();
}
- explicit ExecutionPlanner(loco::Graph *graph, TargetPlatform target_platform) : _graph(graph)
+ explicit ExecutionPlanner(loco::Graph *graph, TargetPlatform target_platform,
+ RuntimeType runtime_type, AllocatingMode allocating_mode)
+ : _graph(graph), _runtime_type(runtime_type), _allocating_mode(allocating_mode)
{
switch (target_platform.platform_type)
{
void make_execution_plan();
// Method change planning mode:
- // is_null_consts = true - constants are no longer taken into account when planning
- // is_null_inputs = true - input are no longer taken into account when planning
- // is_null_scratchpads = true - scratchpads are no longer taken into account when planning
- void change_planning_mode(bool is_null_consts, bool is_null_inputs, bool is_null_scratchpads)
+ // is_allocate_consts = false - constants are no longer taken into account when planning
+ // is_allocate_inputs = false - input are no longer taken into account when planning
+ // is_allocate_scratchpads = false - scratchpads are no longer taken into account when planning
+ void change_planning_mode(bool is_allocate_consts, bool is_allocate_inputs,
+ bool is_allocate_scratchpads)
{
- _is_null_consts = is_null_consts;
- _is_null_inputs = is_null_inputs;
- _is_null_scratchpads = is_null_scratchpads;
+ _is_allocate_consts = is_allocate_consts;
+ _is_allocate_inputs = is_allocate_inputs;
+ _is_allocate_scratchpads = is_allocate_scratchpads;
};
void create_json_allocation_file(const std::string &json_path);
private:
+ // Save execution plan for onert-micro runtime base function.
+ //
+ // NOTE: First, according to ordered_node, the input nodes are written,
+ // then all outputs, finally all nodes in execution order.
+ // Constants are not written.
+ void make_execution_plan_onert_micro_base();
+
+ // Save execution plan for luci-interpreter runtime base function.
+ void make_execution_plan_luci_interpreter();
+
+ // Save execution plan for onert-micro runtime for common buffer type.
+ void make_execution_plan_onert_micro_common_buffer();
+
+ // Save execution plan for onert-micro runtime for common split type.
+ void make_execution_plan_onert_micro_split_buffer();
+
// Method gets default execution order plan and saves it in _ordered_nodes vector.
// There can be different variants of execution order and this method provides main one.
void get_default_execution_order_plan();
+ // Method gets default execution order plan,
+ // but without inputs and output nodes and saves it in _ordered_nodes vector
+ void get_default_execution_order_plan_without_inputs_and_outputs();
+
// Method provides nodes with usage interval information.
void get_usage_interval();
// Method dumps execution plan information.
void dump_inform();
+ void write_execution_plan(uint32_t order_offset);
+
// Method finds required offsets for all nodes from _ordered_nodes, using greedy by size approach.
// It saves offsets in _offsets vector.
// Return: required size of buffer.
uint32_t greedy_by_size_approach();
// Method creates and fills _alloc_node_inform_vector with usage interval inform and node's sizes.
- // null_consts = true - size of const nodes will be equal 0;
- // null_inputs = true - size of input nodes will be equal 0;
- // null_scratchpad = true - size of scratchpad nodes will be equal 0;
+ // _is_allocate_const = true - size of const nodes will be equal 0;
+ // _is_allocate_input = true - size of input nodes will be equal 0;
+ // _is_allocate_scratchpad = true - size of scratchpad nodes will be equal 0;
// It using if we don't want to take input(const or scratchpads) nodes into account
// when determining offsets and calculating the required buffer size. This is uses for
// experiments.
- void create_alloc_node_inform_vector(bool null_consts = false, bool null_inputs = false,
- bool null_scratchpad = false);
+ void create_alloc_node_inform_vector();
// Stores allocation additional information for the all nodes from _graph.
std::vector<AllocationNodeInformation> _alloc_node_inform_vector;
// Calculate size of scratchpad tensors for current platform
std::unique_ptr<IScratchpadHelper> _scratchpad_helper;
+ // Supported runtime type
+ RuntimeType _runtime_type;
+
+ // Supported buffers type
+ AllocatingMode _allocating_mode;
+
// Required memory size.
uint32_t _required_size = 0;
// Flags for choosing different planning modes:
- // _is_null_consts = true - constants are no longer taken into account when planning
- // _is_null_inputs = true - input are no longer taken into account when planning
- // _is_null_scratchpads = true - scratchpads are no longer taken into account when planning
- bool _is_null_consts = false;
- bool _is_null_inputs = false;
- bool _is_null_scratchpads = false;
+ // _is_allocate_consts = false - constants are no longer taken into account when planning
+ // _is_allocate_inputs = false - input are no longer taken into account when planning
+ // _is_allocate_scratchpads = false - scratchpads are no longer taken into account when planning
+ bool _is_allocate_consts = true;
+ bool _is_allocate_inputs = true;
+ bool _is_allocate_scratchpads = true;
};
} // namespace circle_planner
arser.add_argument("--conv2d_weight")
.nargs(0)
.help("Dump Conv2D series weight operators in circle file");
+ arser.add_argument("--constants").nargs(0).help("Dump constant tensors name");
arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in circle file");
arser.add_argument("--tensor_dtype").nargs(0).help("Dump dtype of tensors");
arser.add_argument("circle").help("Circle file to inspect");
}
if (!arser["--operators"] && !arser["--conv2d_weight"] && !arser["--op_version"] &&
- !arser["--tensor_dtype"])
+ !arser["--tensor_dtype"] && !arser["--constants"])
{
std::cout << "At least one option must be specified" << std::endl;
std::cout << arser;
dumps.push_back(std::make_unique<circleinspect::DumpOperatorVersion>());
if (arser["--tensor_dtype"])
dumps.push_back(std::make_unique<circleinspect::DumpTensorDType>());
+ if (arser["--constants"])
+ dumps.push_back(std::make_unique<circleinspect::DumpConstants>());
std::string model_file = arser.get<std::string>("circle");
}
} // namespace circleinspect
+
+namespace circleinspect
+{
+
+void DumpConstants::run(std::ostream &os, const circle::Model *model)
+{
+ mio::circle::Reader reader(model);
+
+ const uint32_t subgraph_size = reader.num_subgraph();
+
+ for (uint32_t g = 0; g < subgraph_size; g++)
+ {
+ reader.select_subgraph(g);
+ auto tensors = reader.tensors();
+
+ for (uint32_t i = 0; i < tensors->Length(); ++i)
+ {
+ const auto tensor = tensors->Get(i);
+ if (tensor->is_variable())
+ continue;
+
+ auto const buffer_id = tensor->buffer();
+
+ auto const buffer_size = reader.buffer_info(buffer_id, nullptr);
+ if (buffer_size == 0)
+ continue;
+
+ os << reader.tensor_name(tensor) << std::endl;
+ }
+ }
+}
+
+} // namespace circleinspect
void run(std::ostream &os, const circle::Model *model);
};
+class DumpConstants final : public DumpInterface
+{
+public:
+ DumpConstants() = default;
+
+public:
+ void run(std::ostream &os, const circle::Model *model);
+};
+
} // namespace circleinspect
#endif // __DUMP_H__
--- /dev/null
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+get_target_property(ARTIFACTS_PATH testDataGenerator BINARY_DIR)
+get_target_property(CIRCLE_INTERPRETER_PATH circle-interpreter BINARY_DIR)
+set(CIRCLE_INTERPRETER_PATH "${CIRCLE_INTERPRETER_PATH}/circle-interpreter")
+
+nnas_find_package(GTest REQUIRED)
+
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+GTest_AddTest(circle-interpreter-test ${TESTS})
+
+set_tests_properties(circle-interpreter-test
+ PROPERTIES
+ ENVIRONMENT "ARTIFACTS_PATH=${ARTIFACTS_PATH};CIRCLE_INTERPRETER_PATH=${CIRCLE_INTERPRETER_PATH}"
+ )
--- /dev/null
+# circle-interpreter-test
+
+`circle-interpreter-test` checks if _circle-interpreter_ is working as expected.
+
+Current tests includes
+- input arguments test
+- output data test
+- printing help message test
+- validation of arguments and error message test
--- /dev/null
+require("common-artifacts")
+require("circle-interpreter")
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <vector>
+
+#define READSIZE 4096
+
+class circle_interpreter_test : public ::testing::Test
+{
+protected:
+ bool initialize(void);
+ bool run(const std::string &command);
+
+protected:
+ bool compare(const std::string &file1, const std::string &file2);
+
+protected:
+ std::string _artifacts_path;
+ std::string _circle_interpreter_path;
+ std::string _result;
+};
+
+bool circle_interpreter_test::initialize(void)
+{
+ char *path = std::getenv("ARTIFACTS_PATH");
+ if (path == nullptr)
+ {
+ std::cerr << "ARTIFACTS_PATH not found" << std::endl;
+ return false;
+ }
+ _artifacts_path = path;
+
+ path = std::getenv("CIRCLE_INTERPRETER_PATH");
+ if (path == nullptr)
+ {
+ std::cerr << "CIRCLE_INTERPRETER_PATH not found" << std::endl;
+ return false;
+ }
+ _circle_interpreter_path = path;
+
+ return true;
+}
+
+bool circle_interpreter_test::run(const std::string &command)
+{
+ std::vector<char> buffer(READSIZE);
+ std::string result = "";
+ std::string cmd_err = command + " 2>&1";
+ FILE *pipe = popen(cmd_err.c_str(), "r");
+ if (!pipe)
+ {
+ return false;
+ }
+ try
+ {
+ while (fgets(&buffer[0], buffer.size(), pipe) != NULL)
+ {
+ result += &buffer[0];
+ }
+ }
+ catch (...)
+ {
+ pclose(pipe);
+ return false;
+ }
+ pclose(pipe);
+ _result = result;
+
+ std::cout << _result << std::endl;
+
+ return true;
+}
+
+bool circle_interpreter_test::compare(const std::string &file1, const std::string &file2)
+{
+ std::ifstream f1(file1.c_str(), std::ifstream::in | std::ifstream::binary);
+ std::ifstream f2(file2.c_str(), std::ifstream::in | std::ifstream::binary);
+
+ if (!f1.is_open() || !f2.is_open())
+ {
+ return false;
+ }
+
+ typedef unsigned char BYTE;
+ std::vector<BYTE> vBuffer1(READSIZE);
+ std::vector<BYTE> vBuffer2(READSIZE);
+
+ do
+ {
+ f1.read((char *)&vBuffer1[0], READSIZE);
+ std::streamsize f1_bytes = f1.gcount();
+ f2.read((char *)&vBuffer2[0], READSIZE);
+ std::streamsize f2_bytes = f2.gcount();
+
+ if (f1_bytes != f2_bytes)
+ {
+ return false;
+ }
+
+ if (!std::equal(vBuffer1.begin(), vBuffer1.end(), vBuffer2.begin()))
+ {
+ return false;
+ }
+ } while (f1.good() || f2.good());
+ return true;
+}
+
+TEST_F(circle_interpreter_test, show_help_msg)
+{
+ if (!initialize())
+ {
+ FAIL();
+ return;
+ }
+
+ std::string command = _circle_interpreter_path + " -h";
+ if (!run(command))
+ {
+ FAIL();
+ return;
+ }
+
+ const auto pos = _result.find("Usage: ./circle-interpreter");
+ ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(circle_interpreter_test, valid_command)
+{
+ if (!initialize())
+ {
+ FAIL();
+ return;
+ }
+
+ std::string model = _artifacts_path + "/Conv2D_000.circle";
+ std::string input_prefix = _artifacts_path + "/Conv2D_000.circle.input";
+ std::string output_prefix = "/tmp/Conv2D_000.circle.output";
+ std::string generated_output = output_prefix + "0";
+ std::remove(generated_output.c_str());
+ std::string command =
+ _circle_interpreter_path + " " + model + " " + input_prefix + " " + output_prefix;
+ if (!run(command))
+ {
+ FAIL();
+ return;
+ }
+
+ std::string expected_output = _artifacts_path + "/Conv2D_000.circle.output0";
+
+ if (!compare(generated_output, expected_output))
+ {
+ FAIL();
+ return;
+ }
+}
+
+TEST_F(circle_interpreter_test, invalid_option_NEG)
+{
+ if (!initialize())
+ {
+ FAIL();
+ return;
+ }
+
+ std::string model = _artifacts_path + "/Conv2D_000.circle";
+ std::string command = _circle_interpreter_path + " " + model;
+ if (!run(command))
+ {
+ FAIL();
+ return;
+ }
+
+ const auto pos = _result.find("Invalid argument");
+ ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(circle_interpreter_test, not_existing_model_NEG)
+{
+ if (!initialize())
+ {
+ FAIL();
+ return;
+ }
+
+ std::string not_existing_model = _artifacts_path + "/non_exist_file.foo";
+ std::string input_prefix = _artifacts_path + "/Conv2D_000.circle.input";
+ std::string output_prefix = "/tmp/Conv2D_000.circle.output";
+ std::remove(output_prefix.c_str());
+ std::string command =
+ _circle_interpreter_path + " " + not_existing_model + " " + input_prefix + " " + output_prefix;
+ if (!run(command))
+ {
+ FAIL();
+ return;
+ }
+
+ const auto pos = _result.find("Failed to load");
+ ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(circle_interpreter_test, invalid_input_prefix_NEG)
+{
+ if (!initialize())
+ {
+ FAIL();
+ return;
+ }
+
+ std::string model = _artifacts_path + "/Conv2D_000.circle";
+ std::string input_prefix = _artifacts_path + "/non_exist_file.foo";
+ std::string output_prefix = "/tmp/Conv2D_000.circle.output";
+ std::remove(output_prefix.c_str());
+ std::string command =
+ _circle_interpreter_path + " " + model + " " + input_prefix + " " + output_prefix;
+ if (!run(command))
+ {
+ FAIL();
+ return;
+ }
+
+ const auto pos = _result.find("Cannot open file");
+ ASSERT_NE(std::string::npos, pos);
+}
if (fs.fail())
throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
if (fs.read(data, data_size).fail())
- throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
+ throw std::runtime_error("Input tensor size mismatches with \"" + filename + "\".\n");
+ if (fs.peek() != EOF)
+ throw std::runtime_error("Input tensor size mismatches with \"" + filename + "\".\n");
}
void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
--- /dev/null
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_executable(circle-mpqsolver "${SOURCES}")
+target_include_directories(circle-mpqsolver PRIVATE src)
+target_link_libraries(circle-mpqsolver arser)
+target_link_libraries(circle-mpqsolver vconone)
+target_link_libraries(circle-mpqsolver safemain)
+target_link_libraries(circle-mpqsolver luci_lang)
+target_link_libraries(circle-mpqsolver luci_service)
+target_link_libraries(circle-mpqsolver luci_pass)
+target_link_libraries(circle-mpqsolver luci_interpreter)
+target_link_libraries(circle-mpqsolver dio_hdf5)
+target_link_libraries(circle-mpqsolver luci_import)
+target_link_libraries(circle-mpqsolver luci_export)
+target_link_libraries(circle-mpqsolver luci_log)
+target_link_libraries(circle-mpqsolver nncc_common)
+
+install(TARGETS circle-mpqsolver DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# circle-mpqsolver is executable, so we do not link it to the test.
+# Instead, we use TEST_SOURCES to specify sources uesd for tests.
+set(TEST_SOURCES
+ "src/bisection/DepthParameterizer.cpp"
+ "src/bisection/Quantizer.cpp"
+ "src/bisection/ErrorApproximator.cpp")
+
+nnas_find_package(GTest REQUIRED)
+GTest_AddTest(circle_mpqsolver_test ${TESTS} ${TEST_SOURCES})
+target_link_libraries(circle_mpqsolver_test luci_lang)
+target_link_libraries(circle_mpqsolver_test luci_service)
+target_link_libraries(circle_mpqsolver_test luci_pass)
--- /dev/null
+# circle-mpqsolver
+_circle-mpqsolver_ provides light-weight methods for finding a high-quality mixed-precision model
+within a reasonable time.
+
+## Methods
+
+### Bisection
+A model is split into two parts: front and back. One of them is quantized in uint8 and another in
+int16. The precision of front and back is determined by our proxy metric, upperbound of total layer
+errors. (See https://github.com/Samsung/ONE/pull/10170#discussion_r1042246598 for more details)
+
+The boundary between the front and the back is decided by the depth of operators (depth: distance
+from input to the operator), i.e., given a depth d, layers with a depth less than d are included
+in front, and the rest are included in back. Bisection performs binary search to find a proper
+depth which achieves a qerror less than target_qerror.
+
+In case front is quantized into Q16 the pseudocode is the following:
+```
+ until |_depth_max_ - _depth_min_| <=1 do
+ _current_depth_ = 0.5 * (_depth_max_ + _depth_min_)
+ if Loss(_current_depth_) < _target_loss_
+ _depth_max_ = _current_depth_
+ else
+ _depth_min_ = _current_depth_
+```
+, where Loss(current_depth) is the qerror of the mixied-precision model split at current_depth.
+As every iteration halves the remaining range (|depth_max - depth_min|), it converges in
+_~log2(max_depth)_ iterations.
+
+## Usage
+Run _circle-mpqsolver_ with the following arguments.
+
+--data: .h5 file with test data
+
+--input_model: Input float model initialized with min-max (recorded model)
+
+--output_model: Output qunatized mode
+
+--qerror_ratio: Target quantization error ratio. It should be in [0, 1]. 0 indicates qerror of full int16 model, 1 indicates qerror of full uint8 model. The lower `qerror_ratio` indicates the more accurate solution.
+
+--bisection _mode_: input nodes should be at Q16 precision ['auto', 'true', 'false']
+
+```
+$ ./circle-mpqsolver
+ --data <.h5 data>
+ --input_model <input_recorded_model>
+ --output_model <output_model_pat>
+ --qerror_ratio <optional value for reproducing target _qerror_ default is 0.5>
+ --bisection <whether input nodes should be quantized into Q16 default is 'auto'>
+```
+
+For example:
+```
+$./circle-mpqsolver
+ --data dataset.h5
+ --input_model model.recorded.circle
+ --output_model model.q_opt.circle
+ --qerror_ratio 0.4f
+ --bisection true
+```
+
+It will produce _model.q_opt.circle_, which is _model.recorded.circle_ quantized to mixed precision
+using _dataset.h5_, with input nodes set to _Q16_ precision and quantization error (_qerror_) of
+_model.q_opt.circle_ will be less than
+```
+ _qerror(full_q16) + qerror_ratio * (qerror(full_q8) - qerror(full_q16))_
+ ```
+ (_full_q16_ - model quantized using Q16 precision, _full_q8_ - model quantized using Q8 precision).
--- /dev/null
+require("safemain")
+require("arser")
+require("vconone")
+require("luci")
+require("luci-interpreter")
+require("dio-hdf5")
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <arser/arser.h>
+#include <vconone/vconone.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
+#include <luci/Log.h>
+
+#include "bisection/BisectionSolver.h"
+
+#include <iostream>
+#include <iomanip>
+#include <chrono>
+
+void print_version(void)
+{
+ std::cout << "circle-mpqsolver version " << vconone::get_string() << std::endl;
+ std::cout << vconone::get_copyright() << std::endl;
+}
+
+int entry(int argc, char **argv)
+{
+ LOGGER(l);
+
+ const std::string bisection_str = "--bisection";
+
+ arser::Arser arser("circle-mpqsolver provides light-weight methods for finding a high-quality "
+ "mixed-precision model within a reasonable time.");
+
+ arser::Helper::add_version(arser, print_version);
+ arser::Helper::add_verbose(arser);
+
+ arser.add_argument("--data").required(true).help("Path to the test data");
+ arser.add_argument("--data_format").required(false).help("Test data format (default: h5)");
+
+ arser.add_argument("--qerror_ratio")
+ .type(arser::DataType::FLOAT)
+ .default_value(0.5f)
+ .help("quantization error ratio ([0, 1])");
+
+ arser.add_argument(bisection_str)
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("Single optional argument for bisection method. "
+ "Whether input node should be quantized to Q16: 'auto', 'true', 'false'.");
+
+ arser.add_argument("--input_model")
+ .required(true)
+ .help("Input float model with min max initialized");
+
+ arser.add_argument("--input_dtype")
+ .type(arser::DataType::STR)
+ .default_value("uint8")
+ .help("Data type of quantized model's inputs (default: uint8)");
+
+ arser.add_argument("--output_dtype")
+ .type(arser::DataType::STR)
+ .default_value("uint8")
+ .help("Data type of quantized model's outputs (default: uint8)");
+
+ arser.add_argument("--output_model").required(true).help("Output quantized model");
+
+ try
+ {
+ arser.parse(argc, argv);
+ }
+ catch (const std::runtime_error &err)
+ {
+ std::cerr << err.what() << std::endl;
+ std::cout << arser;
+ return EXIT_FAILURE;
+ }
+
+ if (arser.get<bool>("--verbose"))
+ {
+ // The third parameter of setenv means REPLACE.
+ // If REPLACE is zero, it does not overwrite an existing value.
+ setenv("LUCI_LOG", "100", 0);
+ }
+
+ auto data_path = arser.get<std::string>("--data");
+ auto input_model_path = arser.get<std::string>("--input_model");
+ auto output_model_path = arser.get<std::string>("--output_model");
+ auto input_dtype = arser.get<std::string>("--input_dtype");
+ auto output_dtype = arser.get<std::string>("--output_dtype");
+
+ float qerror_ratio = arser.get<float>("--qerror_ratio");
+ if (qerror_ratio < 0.0 || qerror_ratio > 1.f)
+ {
+ std::cerr << "ERROR: quantization ratio must be in [0, 1]" << std::endl;
+ return EXIT_FAILURE;
+ }
+ auto start = std::chrono::high_resolution_clock::now();
+
+ if (arser[bisection_str])
+ {
+ // optimize
+ using namespace mpqsolver::bisection;
+
+ BisectionSolver solver(data_path, qerror_ratio, input_dtype, output_dtype);
+ {
+ auto value = arser.get<std::string>(bisection_str);
+ if (value == "auto")
+ {
+ solver.algorithm(BisectionSolver::Algorithm::Auto);
+ }
+ else if (value == "true")
+ {
+ solver.algorithm(BisectionSolver::Algorithm::ForceQ16Front);
+ }
+ else if (value == "false")
+ {
+ solver.algorithm(BisectionSolver::Algorithm::ForceQ16Back);
+ }
+ else
+ {
+ std::cerr << "ERROR: Unrecognized option for bisection algortithm" << input_model_path
+ << std::endl;
+ return EXIT_FAILURE;
+ }
+ }
+
+ auto optimized = solver.run(input_model_path);
+ if (optimized == nullptr)
+ {
+ std::cerr << "ERROR: Failed to build mixed precision model" << input_model_path << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ // save optimized
+ {
+ luci::CircleExporter exporter;
+ luci::CircleFileExpContract contract(optimized.get(), output_model_path);
+ if (!exporter.invoke(&contract))
+ {
+ std::cerr << "ERROR: Failed to export mixed precision model" << input_model_path
+ << std::endl;
+ return EXIT_FAILURE;
+ }
+ }
+ }
+ else
+ {
+ std::cerr << "ERROR: Unrecognized solver" << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ auto duration = std::chrono::duration_cast<std::chrono::seconds>(
+ std::chrono::high_resolution_clock::now() - start);
+ VERBOSE(l, 0) << "Elapsed Time: " << std::setprecision(5) << duration.count() / 60.f
+ << " minutes." << std::endl;
+
+ return EXIT_SUCCESS;
+}
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MPQSolver.h"
+
+using namespace mpqsolver;
+
+MPQSolver::MPQSolver(const std::string &input_data_path, float qerror_ratio,
+ const std::string &input_quantization, const std::string &output_quantization)
+ : _input_data_path(input_data_path), _qerror_ratio(qerror_ratio),
+ _input_quantization(input_quantization), _output_quantization(output_quantization)
+{
+}
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_MPQSOLEVR_SOLVER_H__
+#define __MPQSOLVER_MPQSOLEVR_SOLVER_H__
+
+#include <luci/IR/Module.h>
+
+#include <memory>
+#include <string>
+
+namespace mpqsolver
+{
+
+class MPQSolver
+{
+public:
+ /**
+ * @brief construct Solver using input_data_path for .h5 file,
+ * qerror_ratio to set target qerror, and input_quantization/output_quantization to set
+ * quantization type at input/output respectively
+ */
+ MPQSolver(const std::string &input_data_path, float qerror_ratio,
+ const std::string &input_quantization, const std::string &output_quantization);
+ virtual ~MPQSolver() = default;
+
+ /**
+ * @brief run solver for recorded float module at module_path
+ */
+ virtual std::unique_ptr<luci::Module> run(const std::string &module_path) = 0;
+
+protected:
+ std::string _input_data_path;
+ std::string _input_quantization;
+ std::string _output_quantization;
+ float _qerror_ratio = 0.f; // quantization error ratio
+};
+
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_MPQSOLEVR_SOLVER_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BisectionSolver.h"
+#include "DepthParameterizer.h"
+#include "ErrorMetric.h"
+#include "ErrorApproximator.h"
+
+#include <luci/ImporterEx.h>
+#include <luci/Log.h>
+
+#include <cmath>
+#include <iostream>
+
+using namespace mpqsolver::bisection;
+
+namespace
+{
+
+bool error_at_input_is_larger_than_at_output(const NodeDepthType &nodes_depth, float cut_depth)
+{
+ LOGGER(l);
+
+ float error_at_input = 0;
+ float error_at_output = 0;
+ for (auto &iter : nodes_depth)
+ {
+ float cur_error = approximate(iter.first);
+ if (iter.second < cut_depth)
+ {
+ error_at_input += cur_error;
+ }
+ else
+ {
+ error_at_output += cur_error;
+ }
+ }
+
+ if (error_at_input > error_at_output)
+ {
+ VERBOSE(l, 0) << "Q16 will be set at input due to ";
+ }
+ else
+ {
+ VERBOSE(l, 0) << "Q8 will be set at input due to ";
+ }
+ VERBOSE(l, 0) << error_at_input << " error at input vs ";
+ VERBOSE(l, 0) << error_at_output << " error at output." << std::endl;
+
+ return error_at_input > error_at_output;
+}
+
+std::unique_ptr<luci::Module> read_module(const std::string &path)
+{
+ luci::ImporterEx importerex;
+ auto module = importerex.importVerifyModule(path);
+ if (module.get() == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load " << path << std::endl;
+ return nullptr;
+ }
+
+ return module;
+}
+
+} // namespace
+
+BisectionSolver::BisectionSolver(const std::string &input_data_path, float qerror_ratio,
+ const std::string &input_quantization,
+ const std::string &output_quantization)
+ : MPQSolver(input_data_path, qerror_ratio, input_quantization, output_quantization)
+{
+ _quantizer = std::make_unique<Quantizer>(_input_quantization, _output_quantization);
+}
+
+float BisectionSolver::evaluate(const DatasetEvaluator &evaluator, const std::string &flt_path,
+ const std::string &def_quant, LayerParams &layers)
+{
+ auto model = read_module(flt_path);
+ // get fake quantized model for evaluation
+ if (!_quantizer->fake_quantize(model.get(), def_quant, layers))
+ {
+ throw std::runtime_error("Failed to produce fake-quantized model.");
+ }
+
+ return evaluator.evaluate(model.get());
+}
+
+void BisectionSolver::algorithm(Algorithm algorithm) { _algorithm = algorithm; }
+
+std::unique_ptr<luci::Module> BisectionSolver::run(const std::string &module_path)
+{
+ LOGGER(l);
+
+ auto module = read_module(module_path);
+
+ float min_depth = 0.f;
+ float max_depth = 0.f;
+ NodeDepthType nodes_depth;
+ if (compute_depth(module.get(), nodes_depth, min_depth, max_depth) !=
+ ParameterizerResult::SUCCESS)
+ {
+ std::cerr << "ERROR: Invalid graph for bisectioning" << std::endl;
+ return nullptr;
+ }
+
+ std::unique_ptr<MAEMetric> metric = std::make_unique<MAEMetric>();
+ DatasetEvaluator evaluator(module.get(), _input_data_path, *metric.get());
+
+ LayerParams layer_params;
+ float int16_qerror =
+ evaluate(evaluator, module_path, "int16" /* default quant_dtype */, layer_params);
+ VERBOSE(l, 0) << "Full int16 model quantization error " << int16_qerror << std::endl;
+
+ float uint8_qerror =
+ evaluate(evaluator, module_path, "uint8" /* default quant_dtype */, layer_params);
+ VERBOSE(l, 0) << "Full uint8 model quantization error " << uint8_qerror << std::endl;
+
+ if (int16_qerror > uint8_qerror)
+ {
+ throw std::runtime_error("Q8 model's qerror is less than Q16 model's qerror.");
+ }
+
+ _qerror = int16_qerror + _qerror_ratio * std::fabs(uint8_qerror - int16_qerror);
+ VERBOSE(l, 0) << "Target quantization error " << _qerror << std::endl;
+
+ if (uint8_qerror <= _qerror)
+ {
+ // no need for bisectioning just return Q8 model
+ if (!_quantizer->quantize(module.get(), "uint8", layer_params))
+ {
+ std::cerr << "ERROR: Failed to quantize model" << std::endl;
+ return nullptr;
+ }
+ }
+
+ int last_depth = -1;
+ float best_depth = -1;
+ LayerParams best_params;
+ if (module->size() != 1)
+ {
+ throw std::runtime_error("Unsupported module");
+ }
+ auto graph = module->graph(0);
+ auto active_nodes = loco::active_nodes(loco::output_nodes(graph));
+ // input and output nodes are not valid for quantization, so let's remove them
+ for (auto node : loco::input_nodes(graph))
+ {
+ active_nodes.erase(node);
+ }
+ for (auto node : loco::output_nodes(graph))
+ {
+ active_nodes.erase(node);
+ }
+
+ // let's decide whether nodes at input are more suspectible to be quantized into Q16, than at
+ // output
+ bool int16_front = true;
+ switch (_algorithm)
+ {
+ case Algorithm::Auto:
+ int16_front =
+ error_at_input_is_larger_than_at_output(nodes_depth, 0.5f * (max_depth + min_depth));
+ break;
+ case Algorithm::ForceQ16Front:
+ int16_front = true;
+ break;
+ case Algorithm::ForceQ16Back:
+ int16_front = true;
+ break;
+ }
+
+ while (true)
+ {
+ int cut_depth = static_cast<int>(std::floor(0.5f * (min_depth + max_depth)));
+
+ if (last_depth == cut_depth)
+ {
+ break;
+ }
+ last_depth = cut_depth;
+
+ LayerParams layer_params;
+ for (auto &node : active_nodes)
+ {
+ auto cur_node = loco::must_cast<luci::CircleNode *>(node);
+ auto iter = nodes_depth.find(cur_node);
+ if (iter == nodes_depth.end())
+ {
+ continue; // to filter out nodes like weights
+ }
+
+ float depth = iter->second;
+
+ if ((depth <= cut_depth && int16_front) || (depth >= cut_depth && !int16_front))
+ {
+ auto layer_param = std::make_shared<LayerParam>();
+ {
+ layer_param->name = cur_node->name();
+ layer_param->dtype = "int16";
+ layer_param->granularity = "channel";
+ }
+
+ layer_params.emplace_back(layer_param);
+ }
+ }
+
+ float cur_accuracy = evaluate(evaluator, module_path, "uint8", layer_params);
+ VERBOSE(l, 0) << cut_depth << " : " << cur_accuracy << std::endl;
+
+ if (cur_accuracy < _qerror)
+ {
+ int16_front ? (max_depth = cut_depth) : (min_depth = cut_depth);
+ best_params = layer_params;
+ best_depth = cut_depth;
+ }
+ else
+ {
+ int16_front ? (min_depth = cut_depth) : (max_depth = cut_depth);
+ }
+ }
+
+ VERBOSE(l, 0) << "Found the best configuration at " << best_depth << " depth." << std::endl;
+ if (!_quantizer->quantize(module.get(), "uint8", best_params))
+ {
+ std::cerr << "ERROR: Failed to quantize model" << std::endl;
+ return nullptr;
+ }
+
+ return module;
+}
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_BISECTION_SOLVER_H__
+#define __MPQSOLVER_BISECTION_SOLVER_H__
+
+#include "Quantizer.h"
+#include "Evaluator.h"
+#include <MPQSolver.h>
+
+#include <luci/IR/Module.h>
+
+#include <memory>
+#include <string>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+class BisectionSolver final : public MPQSolver
+{
+public:
+ /**
+ * @brief Algorithm options for running bisection algorithm
+ */
+ enum Algorithm
+ {
+ Auto,
+ ForceQ16Front,
+ ForceQ16Back,
+ };
+
+public:
+ /**
+ * @brief construct Solver using input_data_path for .h5 file,
+ * qerror_ratio to set target qerror, and input_quantization/output_quantization to set
+ * quantization type at input/output respectively
+ */
+ BisectionSolver(const std::string &input_data_path, float qerror_ratio,
+ const std::string &input_quantization, const std::string &output_quantization);
+ BisectionSolver() = delete;
+
+ /**
+ * @brief run bisection for recorded float module at module_path
+ */
+ std::unique_ptr<luci::Module> run(const std::string &module_path) override;
+
+ /**
+ * @brief set used algorithm
+ */
+ void algorithm(Algorithm algorithm);
+
+private:
+ float evaluate(const DatasetEvaluator &evaluator, const std::string &module_path,
+ const std::string &def_quant, LayerParams &layers);
+
+private:
+ float _qerror = 0.f; // quantization error
+ Algorithm _algorithm = Algorithm::ForceQ16Front;
+ std::unique_ptr<Quantizer> _quantizer;
+};
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_BISECTION_SOLVER_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthParameterizer.h"
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+/**
+ * @brief compute maximal distance from graph inputs to graph nodes along with min/max values of
+ * distance and return status of computation (Assumes graph has no cycles)
+ */
+ParameterizerResult compute_depth(const luci::Module *module, NodeDepthType &nodes_depth,
+ float &min_depth, float &max_depth)
+{
+ if (module == nullptr)
+ return ParameterizerResult::FAILURE;
+
+ if (module->size() != 1)
+ return ParameterizerResult::FAILURE;
+
+ auto graph = module->graph(0);
+ if (!graph)
+ return ParameterizerResult::FAILURE;
+
+ // initializing
+ std::vector<luci::CircleNode *> to_process;
+ std::map<std::string, float> named_depth;
+ {
+ auto inputs = loco::input_nodes(graph);
+ for (auto &node : inputs)
+ {
+ auto cnode = loco::must_cast<luci::CircleNode *>(node);
+ to_process.emplace_back(cnode);
+ nodes_depth[cnode] = 0.f;
+ named_depth[cnode->name()] = 0.f;
+ }
+ }
+
+ // enumerating
+ while (!to_process.empty())
+ {
+ auto cur_node = to_process.back();
+ to_process.pop_back();
+ auto iter = nodes_depth.find(cur_node);
+ if (iter == nodes_depth.end())
+ {
+ return ParameterizerResult::FAILURE; // unexpected
+ }
+ float cur_depth = iter->second + 1;
+ // processing children
+ auto children = loco::succs(cur_node);
+ for (auto &child : children)
+ {
+ auto cichild = loco::must_cast<luci::CircleNode *>(child);
+ auto node_depth = nodes_depth.find(cichild);
+ if (node_depth == nodes_depth.end() || node_depth->second < cur_depth)
+ {
+ // initialize depth
+ nodes_depth[cichild] = cur_depth;
+ to_process.push_back(cichild);
+ named_depth[cichild->name()] = cur_depth;
+ }
+ }
+ }
+
+ // compute min/max of depth parameter
+ auto minmax = std::minmax_element(
+ nodes_depth.begin(), nodes_depth.end(),
+ [=](const std::pair<luci::CircleNode *, float> &el1,
+ const std::pair<luci::CircleNode *, float> &el2) { return el1.second < el2.second; });
+
+ min_depth = minmax.first->second;
+ max_depth = minmax.second->second;
+
+ return ParameterizerResult::SUCCESS;
+}
+
+} // namespace bisection
+} // namespace mpqsolver
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_DEPTH_PARAMETERIZER_H__
+#define __MPQSOLVER_DEPTH_PARAMETERIZER_H__
+
+#include <luci/IR/Module.h>
+#include <luci/IR/CircleNodeDecl.h>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+using NodeDepthType = std::map<luci::CircleNode *, float>;
+
+/**
+ * @brief status of parameterization
+ */
+enum class ParameterizerResult : int32_t
+{
+ SUCCESS = 0,
+ FAILURE = 1
+};
+
+/**
+ * @brief compute maximal distance from graph inputs to graph nodes along with min/max values of
+ * distance and return status of compuation (success/failure)
+ */
+ParameterizerResult compute_depth(const luci::Module *module, NodeDepthType &nodes_depth,
+ float &min_depth, float &max_depth);
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_DEPTH_PARAMETERIZER_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "DepthParameterizer.h"
+#include "TestHelper.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+class NConvGraph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ _filter = _g->nodes()->create<luci::CircleConst>();
+ _filter->dtype(loco::DataType::FLOAT32);
+ _filter->shape({_channel_size, 1, 1, _channel_size});
+ _filter->name("conv_filter");
+
+ _bias = _g->nodes()->create<luci::CircleConst>();
+ _bias->dtype(loco::DataType::FLOAT32);
+ _bias->shape({_channel_size});
+ _bias->name("conv_bias");
+
+ _conv = _g->nodes()->create<luci::CircleConv2D>();
+ _conv->padding(luci::Padding::SAME);
+ _conv->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _conv->dtype(loco::DataType::FLOAT32);
+ _conv->shape({1, _width, _height, _channel_size});
+ _conv->name("conv");
+ _conv->filter(_filter);
+ _conv->bias(_bias);
+ _conv->input(input);
+
+ return _conv;
+ }
+
+public:
+ luci::CircleConv2D *_conv = nullptr;
+ luci::CircleConst *_filter = nullptr;
+ luci::CircleConst *_bias = nullptr;
+};
+
+} // namespace
+
+TEST(CircleMPQSolverDepthParameteriserTest, verifyResultsTest)
+{
+ auto m = luci::make_module();
+ NConvGraph g;
+ g.init();
+ auto conv = g._conv;
+ auto input = g._input;
+ auto output = g._output;
+
+ g.transfer_to(m.get());
+
+ mpqsolver::bisection::NodeDepthType nodes_depth;
+ float min_depth = std::numeric_limits<float>().max();
+ float max_depth = -std::numeric_limits<float>().max();
+ auto status = mpqsolver::bisection::compute_depth(m.get(), nodes_depth, min_depth, max_depth);
+
+ EXPECT_TRUE(status == mpqsolver::bisection::ParameterizerResult::SUCCESS);
+ EXPECT_TRUE(max_depth == 2 && min_depth == 0);
+ EXPECT_TRUE(nodes_depth[input] == min_depth);
+ EXPECT_TRUE(nodes_depth[conv] == 1);
+ EXPECT_TRUE(nodes_depth[output] == max_depth);
+}
+
+TEST(CircleMPQSolverDepthParameteriserTest, verifyResultsTest_NEG)
+{
+ auto m = luci::make_module();
+ mpqsolver::bisection::NodeDepthType nodes_depth;
+ float min_depth = std::numeric_limits<float>().max();
+ float max_depth = -std::numeric_limits<float>().max();
+ auto status = mpqsolver::bisection::compute_depth(m.get(), nodes_depth, min_depth, max_depth);
+
+ EXPECT_TRUE(status == mpqsolver::bisection::ParameterizerResult::FAILURE);
+}
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ErrorApproximator.h"
+
+#include <cmath>
+#include <limits>
+#include <vector>
+#include <functional>
+#include <luci/IR/CircleNode.h>
+
+namespace
+{
+
+using namespace luci;
+using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
+
+inline bool has_min_max(const CircleNode *node)
+{
+ return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty();
+}
+
+inline uint32_t cal_offset(const loco::TensorShape &dimension, uint32_t *indices)
+{
+ return indices[0] * dimension.dim(1).value() * dimension.dim(2).value() *
+ dimension.dim(3).value() +
+ indices[1] * dimension.dim(2).value() * dimension.dim(3).value() +
+ indices[2] * dimension.dim(3).value() + indices[3];
+}
+
+uint32_t get_channel_dim_index(const CircleNode *node)
+{
+ uint32_t index = 0;
+ auto opcode = node->opcode();
+ switch (opcode)
+ {
+ case CircleOpcode::CONV_2D:
+ case CircleOpcode::TRANSPOSE_CONV:
+ case CircleOpcode::FULLY_CONNECTED:
+ index = 0;
+ break;
+ case CircleOpcode::DEPTHWISE_CONV_2D:
+ index = 3;
+ break;
+ default:
+ throw std::runtime_error("Failed to find channel index in " + node->name());
+ }
+
+ return index;
+}
+
+bool set_weight_dim(const CircleNode *node, const CircleConst *weights,
+ loco::TensorShape &dimension)
+{
+ auto opcode = node->opcode();
+ switch (opcode)
+ {
+ case CircleOpcode::CONV_2D:
+ case CircleOpcode::TRANSPOSE_CONV:
+ case CircleOpcode::DEPTHWISE_CONV_2D:
+ assert(node->rank() == 4);
+ dimension.rank(node->rank());
+ dimension.dim(0).set(weights->dim(0).value());
+ dimension.dim(1).set(weights->dim(1).value());
+ dimension.dim(2).set(weights->dim(2).value());
+ dimension.dim(3).set(weights->dim(3).value());
+ break;
+ case CircleOpcode::FULLY_CONNECTED:
+ assert(node->rank() == 2);
+ dimension.rank(4);
+ dimension.dim(0).set(weights->dim(0).value());
+ dimension.dim(1).set(1); // Set FC layer like CONV
+ dimension.dim(2).set(1);
+ dimension.dim(3).set(weights->dim(1).value());
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+loco::Node *get_weight(const CircleNode *node)
+{
+ loco::Node *weight = nullptr;
+ auto opcode = node->opcode();
+ switch (opcode)
+ {
+ case CircleOpcode::CONV_2D:
+ {
+ auto conv = loco::must_cast<const CircleConv2D *>(node);
+ weight = conv->filter();
+ }
+ break;
+ case CircleOpcode::DEPTHWISE_CONV_2D:
+ {
+ auto dconv = loco::must_cast<const CircleDepthwiseConv2D *>(node);
+ weight = dconv->filter();
+ }
+ break;
+ case CircleOpcode::TRANSPOSE_CONV:
+ {
+ auto tconv = loco::must_cast<const CircleTransposeConv *>(node);
+ weight = tconv->filter();
+ }
+ break;
+ case CircleOpcode::FULLY_CONNECTED:
+ {
+ auto fc = loco::must_cast<const CircleFullyConnected *>(node);
+ weight = fc->weights();
+ }
+ break;
+ default:
+ break;
+ }
+
+ return weight;
+}
+
+inline CircleConst *get_constant_weight(const CircleNode *node)
+{
+ CircleConst *weight = dynamic_cast<CircleConst *>(get_weight(node));
+ if (weight == nullptr)
+ {
+ throw std::runtime_error("Unsupported non-constant weights in convolution node " +
+ node->name());
+ }
+
+ return weight;
+}
+
+void iterate_per_channel(const CircleNode *node, IterFunc func)
+{
+ CircleConst *weight = get_constant_weight(node);
+
+ loco::TensorShape dimension;
+ set_weight_dim(node, weight, dimension);
+ uint32_t indices[4] = {
+ 0,
+ };
+
+ auto channel_dim_index = get_channel_dim_index(node);
+
+ for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
+ {
+ for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
+ {
+ for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
+ {
+ for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
+ {
+ func(indices, dimension, channel_dim_index);
+ }
+ }
+ }
+ }
+}
+
+void cal_minmax_per_channel(const CircleNode *node, std::vector<float> &min,
+ std::vector<float> &max)
+{
+ CircleConst *weight = get_constant_weight(node);
+
+ loco::TensorShape dimension;
+ set_weight_dim(node, weight, dimension);
+
+ auto channel_dim_index = get_channel_dim_index(node);
+ auto size = dimension.dim(channel_dim_index).value();
+
+ std::vector<bool> has_min_max_value(size, false);
+ min.resize(size);
+ max.resize(size);
+
+ auto cal_minmax = [&](uint32_t *indices, loco::TensorShape &dimension,
+ uint32_t channel_dim_index) {
+ uint32_t channel_idx = indices[channel_dim_index];
+ auto data = weight->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+ if (has_min_max_value[channel_idx])
+ {
+ min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
+ max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
+ }
+ else
+ {
+ min[channel_idx] = data;
+ max[channel_idx] = data;
+ has_min_max_value[channel_idx] = true;
+ }
+ };
+
+ iterate_per_channel(node, cal_minmax);
+}
+
+bool get_shape(const CircleNode *circle_node, std::vector<uint32_t> &shape)
+{
+ if (circle_node->shape_status() == ShapeStatus::VALID)
+ {
+ auto rank = circle_node->rank();
+ if (rank != 4)
+ return false;
+
+ shape.resize(rank);
+ for (uint32_t i = 0; i < rank; i++)
+ {
+ shape[i] = circle_node->dim(i).value();
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * @brief get_additions_per_channel computes W * H * CIN * KW * KH.
+ *
+ * W, H - width/height of OFM; KW, KH - convolution kernel width/height;
+ * CIN - number of channels in IFM (for depthwise its unity)
+ * See
+ * https://github.com/Samsung/ONE/pull/10170#discussion_r1065371638
+ * for derivation.
+ */
+uint32_t get_additions_per_channel(const CircleNode *node)
+{
+ uint32_t adds_per_channel = 1;
+ std::vector<uint32_t> ofm_shape;
+ if (!get_shape(node, ofm_shape)) // [BATCH, W, H, channels_out]
+ {
+ throw std::runtime_error("Failed to find correct shape " + node->name());
+ }
+
+ adds_per_channel *= ofm_shape[1] * ofm_shape[2]; // adds_per_channel *= W * H
+
+ auto weights = loco::must_cast<CircleNode *>(get_weight(node));
+ {
+ std::vector<uint32_t> w_shape;
+ if (get_shape(weights, w_shape)) // [channels_out, k_x, k_y, channels_in]
+ {
+ adds_per_channel *= (w_shape[1] * w_shape[2]); // adds_per_channel *= k_x * k_y
+ }
+ if (node->opcode() != CircleOpcode::DEPTHWISE_CONV_2D)
+ {
+ // for not depthwise convolutions we need to scale it by CIN
+ adds_per_channel *= w_shape[3]; // adds_per_channel *= c_in
+ }
+ }
+
+ return adds_per_channel;
+}
+
+void get_min_max_ifm_values(const CircleNode *node, float &ci_min, float &ci_max)
+{
+ auto preds = loco::preds(node);
+ for (const auto &pred : preds)
+ {
+ auto parent_node = loco::must_cast<const luci::CircleNode *>(pred);
+ if (has_min_max(parent_node))
+ {
+ auto quantparam = parent_node->quantparam();
+ if (quantparam->min.size() > 0)
+ {
+ ci_min = quantparam->min[0];
+ ci_max = quantparam->max[0];
+ }
+ }
+ }
+}
+
+/**
+ * @brief Return upper bound of quantization error for CONV, DCONV, TCONV.
+ *
+ * See
+ * https://github.com/Samsung/ONE/pull/10170#discussion_r1065371638 for details.
+ */
+float approximate_conv(const CircleNode *node)
+{
+ float volume_W_A_err = 0.f;
+ {
+ // activation min-max values
+ float ci_min = 0.f;
+ float ci_max = 0.f;
+ get_min_max_ifm_values(node, ci_min, ci_max);
+
+ // channel-wise min, max
+ std::vector<float> min_values;
+ std::vector<float> max_values;
+ cal_minmax_per_channel(node, min_values, max_values);
+ assert(not min_values.empty());
+ assert(not max_values.empty());
+
+ // ranges = (max_values - min_values)
+ std::vector<float> ranges;
+ std::transform(max_values.begin(), max_values.end(), min_values.begin(),
+ std::back_inserter(ranges), std::minus<float>());
+
+ // maximal weight value across all channels
+ float w_max = 0;
+ {
+ assert(max_values.size() == min_values.size());
+ for (size_t i = 0; i < max_values.size(); ++i)
+ {
+ w_max = std::max(w_max, std::abs(max_values[i]));
+ w_max = std::max(w_max, std::abs(min_values[i]));
+ }
+ }
+
+ // total weight quantization error across all channels
+ // so maximal error of quantization is ~ (max_value - min_value) / 255
+ // omitting 255 term we get that maximal error of quantization is just its range
+ float sum_err = 0.f;
+ for (auto cur_err : ranges)
+ {
+ sum_err += cur_err;
+ }
+
+ uint32_t adds_per_channel = get_additions_per_channel(node);
+ uint32_t num_of_channels = ranges.size();
+
+ // maximal error introduced by weights quantization (for all channels)
+ volume_W_A_err = sum_err * std::max(::fabs(ci_max), ::fabs(ci_min));
+ // plus total error introduced by activation quantization (for all channels)
+ volume_W_A_err += w_max * num_of_channels * ::fabs(ci_max - ci_min);
+ // scale by volume of adds per channel
+ volume_W_A_err *= adds_per_channel;
+ // scale to get more readable output values
+ volume_W_A_err /= 1.e+6f;
+ }
+
+ return volume_W_A_err;
+}
+
+} // namespace
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+/**
+ * How Approximate works?
+ *
+ * Currently it works just for convolution layers, but may be generalized for other types as well.
+ * See discussion at https://github.com/Samsung/ONE/pull/10170#discussion_r1042246598
+ * Convolution can be expressed as a matrix multiplication.
+ * While quantizing we introduce quantization error into convolution operand (activations) as well
+ * as into convolution weights. A_q * W_q = (A + q_err(A)) * (W + q_err(W)) = A * W + A * q_err(W) +
+ * W * q_err(A) + q_err(A) * q_err(W), assuming q_err(A) * q_err(W) are negligible as quadratic
+ * terms, we get A_q * W_q ~ A * W + A * q_err(W) + W * q_err(A) , q_err - quantization error,
+ * W - weight matrix, A - activations from previous layer (IFM), so quantization error of matrix
+ * multiplication can be approximated as A * q_err(W) + W * q_err(A). Estimating its upper bound
+ * we get A * q_err(W) + W * q_err(A) <=
+ * number_of_additions * (A_max * (W_max - W_min) / 255 + W_max * (A_max - A_min) / 255)
+ * The following code tries to get total error for quantizing convolution node into Q8.
+ * It's just an heuristic (Metric sensitivity depends highly on derivatives as well).
+ */
+float approximate(const CircleNode *node)
+{
+ auto opcode = node->opcode();
+ float qerror = 0.f;
+ switch (opcode)
+ {
+ case CircleOpcode::DEPTHWISE_CONV_2D:
+ case CircleOpcode::CONV_2D:
+ case CircleOpcode::TRANSPOSE_CONV:
+ qerror = approximate_conv(node);
+ break;
+ default: // TODO (FULLY_CONNECTED e.g.)
+ qerror = 0.f;
+ }
+
+ return qerror;
+}
+
+} // namespace bisection
+} // namespace mpqsolver
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_BISECTION_ERROR_APPROXIMATOR_H__
+#define __MPQSOLVER_BISECTION_ERROR_APPROXIMATOR_H__
+
+#include <loco.h>
+
+#include <luci/IR/CircleNodeDecl.h>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+/**
+ * @brief approximate error introduced while quantizing node into Q8
+ */
+float approximate(const luci::CircleNode *node);
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif // __MPQSOLVER_BISECTION_ERROR_APPROXIMATOR_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "ErrorApproximator.h"
+#include "TestHelper.h"
+
+#include <luci/IR/CircleNodeDecl.h>
+
+#include <cmath>
+
+namespace
+{
+
+inline uint32_t cal_offset(uint32_t shape[4], uint32_t *indices)
+{
+ return indices[0] * shape[1] * shape[2] * shape[3] + indices[1] * shape[2] * shape[3] +
+ indices[2] * shape[3] + indices[3];
+}
+
+class NConvGraph final : public SimpleGraph
+{
+protected:
+ void initInput(loco::Node *input) override
+ {
+ auto ci_input = loco::must_cast<luci::CircleNode *>(input);
+ ci_input->shape_status(luci::ShapeStatus::VALID);
+ auto qparam = std::make_unique<luci::CircleQuantParam>();
+ qparam->min.assign(_channel_size, _a_min);
+ qparam->max.assign(_channel_size, _a_max);
+ ci_input->quantparam(std::move(qparam));
+ }
+
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ _filter = _g->nodes()->create<luci::CircleConst>();
+ _filter->dtype(loco::DataType::FLOAT32);
+ _filter->shape({_channel_size, _f_w, _f_h, _channel_size});
+ _filter->shape_status(luci::ShapeStatus::VALID);
+ _filter->name("conv_filter");
+ uint32_t indices[4] = {
+ 0,
+ };
+
+ uint32_t w_shape[4] = {_filter->dim(0).value(), _filter->dim(1).value(),
+ _filter->dim(2).value(), _filter->dim(3).value()};
+
+ _filter->size<loco::DataType::FLOAT32>(w_shape[0] * w_shape[1] * w_shape[2] * w_shape[3]);
+
+ for (indices[0] = 0; indices[0] < w_shape[0]; ++indices[0])
+ {
+ for (indices[1] = 0; indices[1] < w_shape[1]; ++indices[1])
+ {
+ for (indices[2] = 0; indices[2] < w_shape[2]; ++indices[2])
+ {
+ for (indices[3] = 0; indices[3] < w_shape[3]; ++indices[3])
+ {
+ uint32_t offset = cal_offset(w_shape, indices);
+ _filter->at<loco::DataType::FLOAT32>(offset) = (offset % 2 == 0) ? _w_max : _w_min;
+ }
+ }
+ }
+ }
+
+ _bias = _g->nodes()->create<luci::CircleConst>();
+ _bias->dtype(loco::DataType::FLOAT32);
+ _bias->shape({_channel_size});
+ _bias->name("conv_bias");
+
+ _conv = _g->nodes()->create<luci::CircleConv2D>();
+ _conv->padding(luci::Padding::SAME);
+ _conv->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _conv->dtype(loco::DataType::FLOAT32);
+ _conv->shape({1, _width, _height, _channel_size});
+ _conv->shape_status(luci::ShapeStatus::VALID);
+ _conv->name("conv");
+ _conv->filter(_filter);
+ _conv->bias(_bias);
+ _conv->input(input);
+
+ return _conv;
+ }
+
+public:
+ luci::CircleConv2D *_conv = nullptr;
+ luci::CircleConst *_filter = nullptr;
+ luci::CircleConst *_bias = nullptr;
+ uint32_t _f_w = 1;
+ uint32_t _f_h = 1;
+ float _w_min = -1.f;
+ float _w_max = 1.f;
+ float _a_min = -1.f;
+ float _a_max = 1.f;
+};
+
+} // namespace
+
+TEST(CircleMPQSolverErrorApproximatorTest, verifyResultsTest)
+{
+ NConvGraph g;
+ g.init();
+
+ auto value = mpqsolver::bisection::approximate(g._conv);
+ float expected = ((g._w_max - g._w_min) * g._channel_size * std::max(g._a_max, g._a_min) +
+ (g._a_max - g._a_min) * g._channel_size * std::max(g._w_max, g._w_min)) *
+ g._f_h * g._f_w * g._height * g._width * g._channel_size / 1.e+6f;
+ EXPECT_FLOAT_EQ(expected, value);
+}
+
+TEST(CircleMPQSolverErrorApproximatorTest, verifyResultsTest_NEG)
+{
+ NConvGraph g;
+ g.init();
+
+ auto value = mpqsolver::bisection::approximate(g._input);
+ float expected = 0.f;
+ EXPECT_FLOAT_EQ(expected, value);
+
+ value = mpqsolver::bisection::approximate(g._output);
+ expected = 0.f;
+ EXPECT_FLOAT_EQ(expected, value);
+}
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ErrorMetric.h"
+
+#include <loco/IR/DataType.h>
+#include <loco/IR/DataTypeTraits.h>
+
+#include <cmath>
+#include <cassert>
+
+using namespace mpqsolver::bisection;
+
+/**
+ * @brief compare first and second operands in MAE (Mean Average Error metric)
+ */
+float MAEMetric::compute(const WholeOutput &first, const WholeOutput &second) const
+{
+ assert(first.size() == second.size());
+
+ float error = 0.f;
+ size_t output_size = 0;
+
+ for (size_t sample_index = 0; sample_index < first.size(); ++sample_index)
+ {
+ assert(first[sample_index].size() == second[sample_index].size());
+ for (size_t out_index = 0; out_index < first[sample_index].size(); ++out_index)
+ {
+ const Buffer &first_elementary = first[sample_index][out_index];
+ const Buffer &second_elementary = second[sample_index][out_index];
+ assert(first_elementary.size() == second_elementary.size());
+ size_t cur_size = first_elementary.size() / loco::size(loco::DataType::FLOAT32);
+
+ const float *first_floats = reinterpret_cast<const float *>(first_elementary.data());
+ const float *second_floats = reinterpret_cast<const float *>(second_elementary.data());
+ for (size_t index = 0; index < cur_size; index++)
+ {
+ float ref_value = *(first_floats + index);
+ float cur_value = *(second_floats + index);
+ error += std::fabs(ref_value - cur_value);
+ }
+ output_size += cur_size;
+ }
+ }
+
+ return error / output_size;
+}
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_BISECTION_ERROR_METRIC_H__
+#define __MPQSOLVER_BISECTION_ERROR_METRIC_H__
+
+#include <vector>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+using Buffer = std::vector<char>;
+using Output = std::vector<Buffer>;
+using WholeOutput = std::vector<Output>;
+
+class ErrorMetric
+{
+public:
+ virtual ~ErrorMetric() = default;
+
+ /**
+ * @brief abstract method for comparing first and second operands
+ */
+ virtual float compute(const WholeOutput &first, const WholeOutput &second) const = 0;
+};
+
+// Mean Absolute Error
+class MAEMetric final : public ErrorMetric
+{
+public:
+ /**
+ * @brief compare first and second operands in MAE (Mean Average Error metric)
+ */
+ float compute(const WholeOutput &first, const WholeOutput &second) const;
+};
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_BISECTION_ERROR_METRIC_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Evaluator.h"
+
+#include <luci_interpreter/Interpreter.h>
+
+#include <dio_hdf5/HDF5Importer.h>
+
+using namespace mpqsolver::bisection;
+
+using Shape = std::vector<loco::Dimension>;
+
+namespace
+{
+
+using namespace luci;
+
+template <typename NodeT> size_t get_tensor_size(const NodeT *node)
+{
+ uint32_t tensor_size = loco::size(node->dtype());
+ for (uint32_t i = 0; i < node->rank(); ++i)
+ tensor_size *= node->dim(i).value();
+ return tensor_size;
+}
+
+WholeOutput compute_outputs(const luci::Module *module, const std::string &h5file)
+{
+ dio::hdf5::HDF5Importer importer{h5file};
+ importer.importGroup("value");
+
+ bool is_raw_data = importer.isRawData();
+
+ const auto num_records = importer.numData();
+ if (num_records == 0)
+ throw std::runtime_error("The input data file does not contain any record.");
+ const auto input_nodes = loco::input_nodes(module->graph());
+ const auto num_inputs = input_nodes.size();
+
+ WholeOutput dataset_output;
+
+ // Create interpreter.
+ luci_interpreter::Interpreter interpreter(module);
+ for (int32_t record_idx = 0; record_idx < num_records; record_idx++)
+ {
+ if (num_inputs != static_cast<uint32_t>(importer.numInputs(record_idx)))
+ throw std::runtime_error("Wrong number of inputs.");
+ for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+ {
+ const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+ assert(input_node->index() == input_idx);
+
+ std::vector<char> input_data(get_tensor_size(input_node));
+
+ if (!is_raw_data)
+ {
+ loco::DataType dtype;
+ Shape shape;
+ importer.readTensor(record_idx, input_idx, &dtype, &shape, input_data.data());
+ }
+ else
+ {
+ // Skip type/shape check for raw data
+ importer.readTensor(record_idx, input_idx, input_data.data());
+ }
+
+ interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+ }
+
+ interpreter.interpret();
+
+ Output nn_output;
+
+ // Get output.
+ const auto output_nodes = loco::output_nodes(module->graph());
+ for (size_t i = 0; i < module->graph()->outputs()->size(); i++)
+ {
+ const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+ Buffer output_data(get_tensor_size(output_node));
+ interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+ // output
+ nn_output.push_back(output_data);
+ }
+ dataset_output.push_back(nn_output);
+ }
+
+ return dataset_output;
+}
+
+} // namespace
+
+DatasetEvaluator::DatasetEvaluator(const luci::Module *ref_module, const std::string &h5file,
+ const ErrorMetric &metric)
+ : _ref_module(ref_module), _h5file(h5file), _metric(&metric)
+{
+ _ref_output = compute_outputs(_ref_module, _h5file);
+}
+
+void DatasetEvaluator::validate(const luci::Module *trgt_fq_module) const
+{
+ const auto output_nodes = loco::output_nodes(trgt_fq_module->graph());
+ for (size_t out_index = 0; out_index < output_nodes.size(); ++out_index)
+ {
+ const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[out_index]);
+ loco::DataType out_dtype = output_node->dtype();
+ if (out_dtype != loco::DataType::FLOAT32)
+ throw std::runtime_error("Unsupported output dtype " + output_node->name());
+ }
+}
+
+float DatasetEvaluator::evaluate(const luci::Module *trgt_fq_module) const
+{
+ if (trgt_fq_module == nullptr)
+ throw std::runtime_error("Invalid target module");
+
+ if (_metric == nullptr)
+ throw std::runtime_error("Invalid metric");
+
+ validate(trgt_fq_module);
+
+ const WholeOutput &cur_output = compute_outputs(trgt_fq_module, _h5file);
+ float error = _metric->compute(_ref_output, cur_output);
+ return error;
+}
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_BISECTION_EVALUATOR_H__
+#define __MPQSOLVER_BISECTION_EVALUATOR_H__
+
+#include "ErrorMetric.h"
+
+#include <luci/IR/Module.h>
+#include <luci/CircleQuantizer.h>
+
+#include <string>
+#include <vector>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+class DatasetEvaluator final
+{
+public:
+ /**
+ * @brief create Evaluator for comparing output of ref_module on h5file
+ */
+ DatasetEvaluator(const luci::Module *ref_module, const std::string &h5file,
+ const ErrorMetric &metric);
+ DatasetEvaluator() = delete;
+ ~DatasetEvaluator() = default;
+
+ /**
+ * @brief evaluate trgt_fq_module (fake-quantized)
+ * returns error-metric
+ */
+ float evaluate(const luci::Module *trgt_fq_module) const;
+
+private:
+ /**
+ * @brief throws if there is something wrong with the module
+ */
+ void validate(const luci::Module *module) const;
+
+private:
+ const luci::Module *_ref_module = nullptr;
+ std::string _h5file;
+ WholeOutput _ref_output;
+ const ErrorMetric *_metric = nullptr;
+};
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_BISECTION_EVALUATOR_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Quantizer.h"
+#include <luci/Service/Validate.h>
+
+#include <iostream>
+
+using namespace mpqsolver::bisection;
+using AlgorithmParameters = luci::CircleQuantizer::Options::AlgorithmParameters;
+using Algorithms = luci::CircleQuantizer::Options::Algorithm;
+
+namespace
+{
+
+bool make_model_fake_quantized(luci::Module *module)
+{
+ luci::CircleQuantizer quantizer;
+
+ auto options = quantizer.options();
+ options->enable(Algorithms::ConvertToFakeQuantizedModel);
+
+ for (size_t idx = 0; idx < module->size(); ++idx)
+ {
+ auto graph = module->graph(idx);
+ // quantize the graph
+ quantizer.quantize(graph);
+ if (!luci::validate(graph))
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+} // namespace
+
+Quantizer::Quantizer(const std::string &input_dtype, const std::string &output_dtype)
+ : _input_dtype(input_dtype), _output_dtype(output_dtype)
+{
+}
+
+/**
+ * @brief quantize recorded module (min/max initialized) with specified parameters
+ * returns true on success
+ */
+bool Quantizer::quantize(luci::Module *module, const std::string &quant_dtype,
+ LayerParams &layer_params)
+{
+ if (!module)
+ return false;
+
+ static const std::string default_dtype = "float32";
+ static const std::string granularity_type = "channel";
+
+ luci::CircleQuantizer quantizer;
+
+ auto options = quantizer.options();
+ options->enable(Algorithms::QuantizeWithMinMax);
+
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, default_dtype);
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, quant_dtype);
+ options->param(AlgorithmParameters::Quantize_granularity, granularity_type);
+ options->param(AlgorithmParameters::Quantize_input_type, _input_dtype);
+ options->param(AlgorithmParameters::Quantize_output_type, _output_dtype);
+ options->param(AlgorithmParameters::Quantize_TF_style_maxpool, "False");
+
+ if (!layer_params.empty())
+ {
+ try
+ {
+ options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params);
+ }
+ catch (const std::runtime_error &e)
+ {
+ std::cerr << e.what() << '\n';
+ return false;
+ }
+ }
+
+ for (size_t idx = 0; idx < module->size(); ++idx)
+ {
+ auto graph = module->graph(idx);
+ // quantize the graph
+ quantizer.quantize(graph);
+ if (!luci::validate(graph))
+ {
+ std::cerr << "ERROR: Quantized graph is invalid" << std::endl;
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * @brief fake_quantize recorded module (min/max initialized) with specified parameters
+ * returns true on success
+ */
+bool Quantizer::fake_quantize(luci::Module *module, const std::string &quant_dtype,
+ LayerParams &layer_params)
+{
+ if (!quantize(module, quant_dtype, layer_params))
+ return false;
+
+ if (!make_model_fake_quantized(module))
+ return false;
+
+ return true;
+}
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_BISECTION_QUANTIZER_H__
+#define __MPQSOLVER_BISECTION_QUANTIZER_H__
+
+#include <luci/IR/Module.h>
+#include <luci/CircleQuantizer.h>
+
+#include <string>
+#include <vector>
+#include <memory>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+using LayerParam = luci::CircleQuantizer::Options::LayerParam;
+using LayerParams = std::vector<std::shared_ptr<LayerParam>>;
+
+class Quantizer
+{
+public:
+ Quantizer(const std::string &input_dtype, const std::string &output_type);
+
+ /**
+ * @brief quantize recorded module (min/max initialized) with specified parameters
+ * returns true on success
+ */
+ bool quantize(luci::Module *module, const std::string &quant_dtype, LayerParams &layer_params);
+
+ /**
+ * @brief fake_quantize recorded module (min/max initialized) with specified parameters
+ * returns true on success
+ */
+ bool fake_quantize(luci::Module *module, const std::string &quant_dtype,
+ LayerParams &layer_params);
+
+private:
+ std::string _input_dtype = "uint8";
+ std::string _output_dtype = "uint8";
+};
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif //__MPQSOLVER_BISECTION_QUANTIZER_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <gtest/gtest.h>
+
+#include "Quantizer.h"
+#include "TestHelper.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <cmath>
+
+namespace
+{
+
+class AddGraph final : public SimpleGraph
+{
+protected:
+ void initInput(loco::Node *input) override
+ {
+ auto ci_input = loco::must_cast<luci::CircleNode *>(input);
+ initMinMax(ci_input);
+ }
+
+ void initMinMax(luci::CircleNode *node)
+ {
+ auto qparam = std::make_unique<luci::CircleQuantParam>();
+ qparam->min.assign(1, _a_min);
+ qparam->max.assign(1, _a_max);
+ node->quantparam(std::move(qparam));
+ }
+
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ _add = _g->nodes()->create<luci::CircleAdd>();
+ _beta = _g->nodes()->create<luci::CircleConst>();
+
+ _add->dtype(loco::DataType::FLOAT32);
+ _beta->dtype(loco::DataType::FLOAT32);
+
+ uint32_t channel_size = 16;
+ _add->shape({1, _channel_size, _width, _height});
+ _beta->shape({1, _channel_size, _width, _height});
+
+ _beta->size<loco::DataType::FLOAT32>(channel_size);
+ _add->x(input);
+ _add->y(_beta);
+ _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+ _add->name("add");
+ _beta->name("beta");
+ initMinMax(_add);
+
+ return _add;
+ }
+
+public:
+ float _a_min = -1.f;
+ float _a_max = 1.f;
+ luci::CircleAdd *_add = nullptr;
+ luci::CircleConst *_beta = nullptr;
+};
+
+} // namespace
+
+TEST(CircleMPQSolverQuantizerTest, verifyResultsTest)
+{
+ auto m = luci::make_module();
+ AddGraph g;
+ g.init();
+ auto add = g._add;
+ float range = g._a_max - g._a_min;
+ g.transfer_to(m.get());
+
+ std::string def_quant = "uint8";
+ mpqsolver::bisection::Quantizer quantizer(def_quant, def_quant);
+ mpqsolver::bisection::LayerParams params;
+ auto res = quantizer.quantize(m.get(), def_quant, params);
+ EXPECT_TRUE(res);
+ auto quant_param = add->quantparam();
+ EXPECT_TRUE(quant_param != nullptr);
+ EXPECT_TRUE(quant_param->scale.size() == 1);
+ EXPECT_FLOAT_EQ(quant_param->scale[0], range / 255.f);
+ EXPECT_TRUE(quant_param->zerop.size() == 1);
+ EXPECT_TRUE(quant_param->zerop[0] == 128);
+}
+
+TEST(CircleMPQSolverQuantizerTest, verifyResultsTest_NEG)
+{
+ std::string def_quant = "uint8";
+ mpqsolver::bisection::Quantizer quantizer(def_quant, def_quant);
+ mpqsolver::bisection::LayerParams params;
+ auto res = quantizer.quantize(nullptr, def_quant, params);
+ EXPECT_TRUE(!res);
+}
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_TEST_HELPER_H__
+#define __MPQSOLVER_TEST_HELPER_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/Module.h>
+
+class SimpleGraph
+{
+public:
+ SimpleGraph() : _g(loco::make_graph()) {}
+
+public:
+ void init()
+ {
+ _input = _g->nodes()->create<luci::CircleInput>();
+ _output = _g->nodes()->create<luci::CircleOutput>();
+ _input->name("input");
+ _output->name("output");
+
+ auto graph_input = _g->inputs()->create();
+ _input->index(graph_input->index());
+ auto graph_output = _g->outputs()->create();
+ _output->index(graph_output->index());
+
+ graph_input->dtype(loco::DataType::FLOAT32);
+ _input->dtype(loco::DataType::FLOAT32);
+ _output->dtype(loco::DataType::FLOAT32);
+ graph_output->dtype(loco::DataType::FLOAT32);
+
+ graph_input->shape({1, _channel_size, _width, _height});
+ _input->shape({1, _channel_size, _width, _height});
+ _output->shape({1, _channel_size, _width, _height});
+ graph_output->shape({1, _channel_size, _width, _height});
+
+ auto graph_body = insertGraphBody(_input);
+ _output->from(graph_body);
+
+ initInput(_input);
+ }
+
+ virtual ~SimpleGraph() = default;
+ void transfer_to(luci::Module *module)
+ {
+ // WARNING: after g is transfered, _graph_inputs, _inputs
+ // and _graph_outputs, _outputs in TestOsGraphlet will be invalid.
+ // arrays are not cleared as this is just helpers to unit tests
+ module->add(std::move(_g));
+ }
+
+protected:
+ virtual loco::Node *insertGraphBody(loco::Node *input) = 0;
+ virtual void initInput(loco::Node *input){};
+
+public:
+ std::unique_ptr<loco::Graph> _g;
+ luci::CircleInput *_input = nullptr;
+ luci::CircleOutput *_output = nullptr;
+ uint32_t _channel_size = 16;
+ uint32_t _width = 4;
+ uint32_t _height = 4;
+};
+
+#endif //__MPQSOLVER_TEST_HELPER_H__
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VISQErrorApproximator.h"
+
+#include <fstream>
+
+using namespace mpqsolver::bisection;
+
+void VISQErrorApproximator::init(const std::string &visq_data_path)
+{
+ // read file
+ std::ifstream file(visq_data_path);
+ if (!init(file))
+ {
+ throw std::runtime_error("Invalid visq file " + visq_data_path);
+ }
+}
+
+bool VISQErrorApproximator::init(std::istream &)
+{
+ // TODO
+ return true;
+}
+
+float VISQErrorApproximator::approximate(const std::string &node_name) const
+{
+ auto iter = _layer_errors.find(node_name);
+ if (iter == _layer_errors.end())
+ {
+ return 0.f;
+ }
+
+ return iter->second;
+}
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPQSOLVER_BISECTION_VISQ_ERROR_APPROXIMATOR_H__
+#define __MPQSOLVER_BISECTION_VISQ_ERROR_APPROXIMATOR_H__
+
+#include <string>
+#include <map>
+
+namespace mpqsolver
+{
+namespace bisection
+{
+
+class VISQErrorApproximator final
+{
+public:
+ /**
+ * @brief constructor of VISQErrorApproximator
+ */
+ VISQErrorApproximator() = default;
+
+ /**
+ * @brief initiliaze by visq_data_path (throws on failure)
+ */
+ void init(const std::string &visq_data_path);
+
+ /**
+ * @brief approximate error introduced while quantizing node into Q8
+ */
+ float approximate(const std::string &node_name) const;
+
+private:
+ /**
+ * @brief initiliaze by visq_data (returns success)
+ */
+ bool init(std::istream &visq_data);
+
+private:
+ std::string _visq_data_path;
+ std::map<std::string, float> _layer_errors;
+};
+
+} // namespace bisection
+} // namespace mpqsolver
+
+#endif // __MPQSOLVER_BISECTION_VISQ_ERROR_APPROXIMATOR_H__
#include <fstream>
#include <vector>
-class cirlce_operator_test : public ::testing::Test
+class circle_operator_test : public ::testing::Test
{
protected:
bool initialize(void);
std::string _result;
};
-bool cirlce_operator_test::initialize(void)
+bool circle_operator_test::initialize(void)
{
char *path = std::getenv("ARTIFACTS_PATH");
if (path == nullptr)
return true;
}
-bool cirlce_operator_test::run(const std::string &command)
+bool circle_operator_test::run(const std::string &command)
{
std::vector<char> buffer(260);
std::string result = "";
return true;
}
-bool cirlce_operator_test::load(const std::string &file)
+bool circle_operator_test::load(const std::string &file)
{
std::ifstream tmp(file.c_str());
if (tmp.fail())
return true;
}
-TEST_F(cirlce_operator_test, valid_names)
+TEST_F(circle_operator_test, valid_names)
{
if (!initialize())
{
ASSERT_NE(std::string::npos, pos);
}
-TEST_F(cirlce_operator_test, valid_codes)
+TEST_F(circle_operator_test, valid_codes)
{
if (!initialize())
{
ASSERT_NE(std::string::npos, pos);
}
-TEST_F(cirlce_operator_test, invalid_option_NEG)
+TEST_F(circle_operator_test, invalid_option_NEG)
{
if (!initialize())
{
ASSERT_NE(std::string::npos, pos);
}
-TEST_F(cirlce_operator_test, check_code_name)
+TEST_F(circle_operator_test, check_code_name)
{
if (!initialize())
{
ASSERT_NE(std::string::npos, pos2);
}
-TEST_F(cirlce_operator_test, nonexist_file_NEG)
+TEST_F(circle_operator_test, nonexist_file_NEG)
{
if (!initialize())
{
ASSERT_NE(std::string::npos, pos);
}
-TEST_F(cirlce_operator_test, invalid_file_NEG)
+TEST_F(circle_operator_test, invalid_file_NEG)
{
if (!initialize())
{
ASSERT_NE(std::string::npos, pos);
}
-TEST_F(cirlce_operator_test, output_file)
+TEST_F(circle_operator_test, output_file)
{
if (!initialize())
{
target_link_libraries(circle-opselector arser)
target_link_libraries(circle-opselector vconone)
target_link_libraries(circle-opselector luci_service)
+target_link_libraries(circle-opselector luci_partition)
target_link_libraries(circle-opselector luci_profile)
install(TARGETS circle-opselector DESTINATION bin)
target_link_libraries(circle-opselector-test arser)
target_link_libraries(circle-opselector-test vconone)
target_link_libraries(circle-opselector-test luci_service)
+target_link_libraries(circle-opselector-test luci_partition)
target_link_libraries(circle-opselector-test luci_profile)
+target_link_libraries(circle-opselector-test luci_testhelper)
*/
#include "ModuleIO.h"
+#include "OpSelector.h"
+#include <luci/ConnectNode.h>
#include <luci/Profile/CircleNodeID.h>
+#include <luci/Service/CircleNodeClone.h>
#include <arser/arser.h>
#include <vconone/vconone.h>
std::cout << vconone::get_copyright() << std::endl;
}
-std::vector<std::string> split_into_vector(const std::string &str, const char &delim)
-{
- std::vector<std::string> ret;
- std::istringstream is(str);
- for (std::string item; std::getline(is, item, delim);)
- {
- ret.push_back(item);
- }
-
- // remove empty string
- ret.erase(std::remove_if(ret.begin(), ret.end(), [](const std::string &s) { return s.empty(); }),
- ret.end());
-
- return ret;
-}
-
-bool is_number(const std::string &s)
-{
- return !s.empty() && std::find_if(s.begin(), s.end(),
- [](unsigned char c) { return !std::isdigit(c); }) == s.end();
-}
-
-bool is_number(const std::vector<std::string> &vec)
-{
- for (const auto &s : vec)
- {
- if (not::is_number(s))
- {
- return false;
- }
- }
- return true;
-}
-
-/**
- * @brief Segmentation function for user's '--by_id' input
- *
- * @note This function tokenizes the input data.s
- * First, divide it into ',', and if token has '-', devide it once more into '-'.
- * For example, if user input is '12,34,56', it is devided into [12,34,56].
- * If input is '1-2,34,56', it is devided into [[1,2],34,56].
- * And '-' means range so, if input is '2-7', it means all integer between 2-7.
- */
-std::vector<uint32_t> split_id_input(const std::string &str)
-{
- std::vector<uint32_t> by_id;
-
- // tokenize colon-separated string
- auto colon_tokens = ::split_into_vector(str, ',');
- if (colon_tokens.empty()) // input empty line like "".
- {
- std::cerr << "ERROR: Nothing was entered." << std::endl;
- exit(EXIT_FAILURE);
- }
- for (const auto &ctok : colon_tokens)
- {
- auto dash_tokens = ::split_into_vector(ctok, '-');
- if (not::is_number(dash_tokens))
- {
- std::cerr << "ERROR: To select operator by id, please use these args: [0-9], '-', ','"
- << std::endl;
- exit(EXIT_FAILURE);
- }
- // convert string into integer
- std::vector<uint32_t> int_tokens;
- try
- {
- std::transform(dash_tokens.begin(), dash_tokens.end(), std::back_inserter(int_tokens),
- [](const std::string &str) { return static_cast<uint32_t>(std::stoi(str)); });
- }
- catch (const std::out_of_range &)
- {
- // if input is big integer like '123467891234', stoi throw this exception.
- std::cerr << "ERROR: Argument is out of range." << std::endl;
- exit(EXIT_FAILURE);
- }
- catch (...)
- {
- std::cerr << "ERROR: Unknown error" << std::endl;
- exit(EXIT_FAILURE);
- }
-
- switch (int_tokens.size())
- {
- case 0: // inputs like "-"
- {
- std::cerr << "ERROR: Nothing was entered" << std::endl;
- exit(EXIT_FAILURE);
- }
- case 1: // inputs like "1", "2"
- {
- by_id.push_back(int_tokens.at(0));
- break;
- }
- case 2: // inputs like "1-2", "11-50"
- {
- for (uint32_t i = int_tokens.at(0); i <= int_tokens.at(1); i++)
- {
- by_id.push_back(i);
- }
- break;
- }
- default: // inputs like "1-2-3"
- {
- std::cerr << "ERROR: Too many '-' in str." << std::endl;
- exit(EXIT_FAILURE);
- }
- }
- }
-
- return by_id;
-}
-
-std::vector<std::string> split_name_input(const std::string &str)
-{
- return ::split_into_vector(str, ',');
-}
-
int entry(int argc, char **argv)
{
// TODO Add new option names!
std::string input_path = arser.get<std::string>("input");
std::string output_path = arser.get<std::string>("output");
- std::string operator_input;
-
- std::vector<uint32_t> by_id;
- std::vector<std::string> by_name;
-
if (!arser["--by_id"] && !arser["--by_name"] || arser["--by_id"] && arser["--by_name"])
{
std::cerr << "ERROR: Either option '--by_id' or '--by_name' must be specified" << std::endl;
return EXIT_FAILURE;
}
- if (arser["--by_id"])
- {
- operator_input = arser.get<std::string>("--by_id");
- by_id = split_id_input(operator_input);
- }
- if (arser["--by_name"])
- {
- operator_input = arser.get<std::string>("--by_name");
- by_name = split_name_input(operator_input);
- }
-
// Import original circle file.
auto module = opselector::getModule(input_path);
- // Select nodes from user input.
- std::vector<const luci::CircleNode *> selected_nodes;
-
- // put selected nodes into vector.
- if (by_id.size())
+ // TODO support two or more subgraphs
+ if (module.get()->size() != 1)
{
- loco::Graph *graph = module.get()->graph(0); // get main subgraph.
+ std::cerr << "ERROR: Not support two or more subgraphs" << std::endl;
+ return EXIT_FAILURE;
+ }
- for (auto node : loco::all_nodes(graph))
- {
- auto cnode = loco::must_cast<const luci::CircleNode *>(node);
+ opselector::OpSelector op_selector{module.get()};
- try
- {
- auto node_id = luci::get_node_id(cnode); // if the node is not operator, throw runtime_error
+ std::unique_ptr<luci::Module> new_module;
+ std::string operator_input;
- for (auto selected_id : by_id)
- if (selected_id == node_id) // find the selected id
- selected_nodes.emplace_back(cnode);
- }
- catch (std::runtime_error)
- {
- continue;
- }
- }
- }
- if (by_name.size())
+ if (arser["--by_id"])
{
- loco::Graph *graph = module.get()->graph(0); // get main subgraph.
-
- for (auto node : loco::all_nodes(graph))
- {
- auto cnode = loco::must_cast<const luci::CircleNode *>(node);
- std::string node_name = cnode->name();
-
- for (auto selected_name : by_name)
- if (selected_name.compare(node_name) == 0) // find the selected name
- selected_nodes.emplace_back(cnode);
- }
+ operator_input = arser.get<std::string>("--by_id");
+ new_module = op_selector.select_by<opselector::SelectType::ID>(operator_input);
}
- if (selected_nodes.size() == 0)
+ if (arser["--by_name"])
{
- std::cerr << "ERROR: No operator selected" << std::endl;
- exit(EXIT_FAILURE);
+ operator_input = arser.get<std::string>("--by_name");
+ new_module = op_selector.select_by<opselector::SelectType::NAME>(operator_input);
}
- // TODO implement node selections
- // Export to output Circle file
- assert(opselector::exportModule(module.get(), output_path));
+ assert(opselector::exportModule(new_module.get(), output_path));
return 0;
}
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Driver.test.h"
-#include "TestHelper.h"
-
-#include <gtest/gtest.h>
-
-TEST(DriverTest, NoArg_NEG)
-{
- Argv<1> argv;
- argv.add("circle-opselector");
-
- ::testing::internal::CaptureStderr();
- ::testing::internal::CaptureStdout();
- int result = entry(1, argv.argv());
- ::testing::internal::GetCapturedStdout();
- ASSERT_EQ(EXIT_FAILURE, result);
-}
-
-TEST(DriverTest, Wrong_ID_NEG)
-{
- std::string str1 = "1";
- std::string empty = "";
- std::string no_integer = "1531538X5";
-
- ASSERT_EQ(true, is_number(str1));
- ASSERT_EQ(false, is_number(empty));
- ASSERT_EQ(false, is_number(no_integer));
-}
-
-TEST(DriverTest, Split)
-{
- std::vector<uint32_t> vec1;
- std::vector<uint32_t> vec2;
-
- std::string hyphen = "1-3,8-10";
- std::string comma = "1,2,3";
-
- vec1.push_back(1);
- vec1.push_back(2);
- vec1.push_back(3);
- vec1.push_back(8);
- vec1.push_back(9);
- vec1.push_back(10);
-
- vec2.push_back(1);
- vec2.push_back(2);
- vec2.push_back(3);
-
- ASSERT_EQ(vec1, split_id_input(hyphen));
- ASSERT_EQ(vec2, split_id_input(comma));
-}
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLE_OPSELECTOR_DRIVER_TEST_H__
-#define __CIRCLE_OPSELECTOR_DRIVER_TEST_H__
-
-#include <vector>
-#include <string>
-
-int entry(int argc, char **argv);
-bool is_number(const std::string &s);
-std::vector<uint32_t> split_id_input(const std::string &str);
-
-#endif // __CIRCLE_OPSELECTOR_DRIVER_TEST_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OpSelector.h"
+
+#include <luci/ConnectNode.h>
+#include <luci/Profile/CircleNodeID.h>
+#include <luci/Service/CircleNodeClone.h>
+
+#include <algorithm>
+#include <cassert>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace
+{
+
+/**
+ * @brief Tokenize given string
+ *
+ * Assumes given string looks like below.
+ *
+ * - '1,2,5,7,9'
+ * - '1-5,6,7,9,12-14'
+ * - 'tensor_a,tensor_b,tensor_d'
+ *
+ * NOTE. 1-5 is same with '1,2,3,4,5'.
+ *
+ * WARNING. SelectType::NAME doesn't allow '-' like 'tensor_a-tensor_c'.
+ */
+std::vector<std::string> split_into_vector(const std::string &str, const char &delim)
+{
+ std::vector<std::string> ret;
+ std::istringstream is(str);
+ for (std::string item; std::getline(is, item, delim);)
+ {
+ ret.push_back(item);
+ }
+
+ // Remove empty string
+ ret.erase(std::remove_if(ret.begin(), ret.end(), [](const std::string &s) { return s.empty(); }),
+ ret.end());
+
+ return ret;
+}
+
+bool is_number(const std::string &s)
+{
+ return !s.empty() && std::find_if(s.begin(), s.end(),
+ [](unsigned char c) { return !std::isdigit(c); }) == s.end();
+}
+
+bool is_number(const std::vector<std::string> &vec)
+{
+ for (const auto &s : vec)
+ {
+ if (not::is_number(s))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+// TODO Move this class into a separate header for reuse
+class IsMultiOutputNode final : public luci::CircleNodeVisitor<bool>
+{
+public:
+ bool visit(const luci::CircleCustom *) final { return true; }
+ bool visit(const luci::CircleIf *) final { return true; }
+ bool visit(const luci::CircleNonMaxSuppressionV4 *) final { return true; }
+ bool visit(const luci::CircleNonMaxSuppressionV5 *) final { return true; }
+ bool visit(const luci::CircleSplit *) final { return true; }
+ bool visit(const luci::CircleSplitV *) final { return true; }
+ bool visit(const luci::CircleTopKV2 *) final { return true; }
+ bool visit(const luci::CircleUnique *) final { return true; }
+ bool visit(const luci::CircleUnpack *) final { return true; }
+ bool visit(const luci::CircleWhile *) final { return true; }
+ // default is false
+ bool visit(const luci::CircleNode *) final { return false; }
+};
+
+std::unique_ptr<loco::Graph> make_graph(const std::vector<const luci::CircleNode *> nodes)
+{
+ auto graph = loco::make_graph();
+
+ luci::CloneContext ctx;
+ // clone nodes
+ for (const auto &n : nodes)
+ {
+ auto clone = luci::clone_node(n, graph.get());
+ ctx.emplace(n, clone);
+ }
+ // set graph input
+ for (const auto &n : nodes)
+ {
+ for (uint32_t i = 0; i < n->arity(); i++)
+ {
+ auto arg = n->arg(i);
+ auto input_node = dynamic_cast<luci::CircleNode *>(arg);
+ auto ctx_it = ctx.find(input_node);
+ // check if the node already has been cloned
+ if (ctx_it != ctx.end())
+ continue;
+ // the node isn't graph input if it is an other node's input
+ if (std::find(nodes.begin(), nodes.end(), arg) != nodes.end())
+ continue;
+ auto circle_const = dynamic_cast<luci::CircleConst *>(arg);
+ if (circle_const != nullptr)
+ {
+ auto clone = luci::clone_node(circle_const, graph.get());
+ ctx.emplace(circle_const, clone);
+ }
+ else
+ {
+ // circle input
+ auto circle_input = graph->nodes()->create<luci::CircleInput>();
+ input_node = dynamic_cast<luci::CircleNode *>(arg);
+ if (not input_node)
+ {
+ throw std::runtime_error{"ERROR: Invalid graph"};
+ }
+ luci::copy_common_attributes(input_node, circle_input);
+ ctx.emplace(input_node, circle_input);
+ // graph input
+ auto graph_input = graph->inputs()->create();
+ graph_input->name(circle_input->name());
+ graph_input->dtype(circle_input->dtype());
+ // graph input shape
+ auto input_shape = std::make_unique<loco::TensorShape>();
+ input_shape->rank(circle_input->rank());
+ for (uint32_t i = 0; i < circle_input->rank(); i++)
+ {
+ if (circle_input->dim(i).known())
+ {
+ circle_input->dim(i).set(circle_input->dim(i).value());
+ }
+ }
+ graph_input->shape(std::move(input_shape));
+
+ circle_input->index(graph_input->index());
+ }
+ }
+ }
+ // set graph output
+ for (auto &n : nodes)
+ {
+ auto outputs = loco::succs(n);
+ bool beingUsed = false;
+ for (const auto &o : outputs)
+ {
+ if (std::find(nodes.begin(), nodes.end(), o) != nodes.end())
+ {
+ beingUsed = true;
+ break;
+ }
+ }
+ // the node isn't graph output if it is an other node's output
+ if (beingUsed)
+ continue;
+
+ IsMultiOutputNode multiout_visitor;
+ bool isMultiOut = n->accept(&multiout_visitor);
+ for (auto &o : outputs)
+ {
+ const luci::CircleNode *output_node = nullptr;
+ if (isMultiOut)
+ {
+ output_node = dynamic_cast<const luci::CircleNode *>(o);
+ if (not output_node)
+ {
+ throw std::runtime_error{"ERROR: Invalid graph"};
+ }
+ }
+ else
+ {
+ output_node = n;
+ }
+ // circle output
+ auto circle_output = graph->nodes()->create<luci::CircleOutput>();
+ luci::copy_common_attributes(output_node, circle_output);
+ // connect to cloned output node
+ circle_output->from(ctx.find(output_node)->second);
+ // graph output
+ auto graph_output = graph->outputs()->create();
+ graph_output->name(output_node->name());
+ graph_output->dtype(output_node->dtype());
+ // graph output shape
+ auto output_shape = std::make_unique<loco::TensorShape>();
+ output_shape->rank(circle_output->rank());
+ for (uint32_t i = 0; i < output_shape->rank(); i++)
+ {
+ if (circle_output->dim(i).known())
+ {
+ output_shape->dim(i).set(circle_output->dim(i).value());
+ }
+ }
+ graph_output->shape(std::move(output_shape));
+
+ circle_output->index(graph_output->index());
+ if (not isMultiOut)
+ break;
+ }
+ }
+ // connect nodes
+ for (const auto &n : nodes)
+ {
+ luci::clone_connect(n, ctx);
+ }
+
+ return graph;
+}
+
+} // namespace
+
+namespace opselector
+{
+
+OpSelector::OpSelector(const luci::Module *module) : _module{module}
+{
+ if (_module->size() != 1)
+ {
+ throw std::runtime_error{"ERROR: Not support two or more subgraphs"};
+ }
+}
+
+template <>
+std::vector<const luci::CircleNode *>
+OpSelector::select_by<SelectType::ID>(const std::vector<std::string> &comma_tokens)
+{
+ std::vector<uint32_t> by_id;
+
+ for (const auto &comma_token : comma_tokens)
+ {
+ auto dash_tokens = ::split_into_vector(comma_token, '-');
+ if (not::is_number(dash_tokens))
+ {
+ throw std::runtime_error{
+ "ERROR: To select operator by id, please use these args: [0-9], '-', ','"};
+ }
+
+ // Convert string into integer
+ std::vector<uint32_t> int_tokens;
+ try
+ {
+ std::transform(dash_tokens.begin(), dash_tokens.end(), std::back_inserter(int_tokens),
+ [](const std::string &str) { return static_cast<uint32_t>(std::stoi(str)); });
+ }
+ catch (const std::out_of_range &)
+ {
+ // Uf input is big integer like '123467891234', stoi throws this exception.
+ throw std::runtime_error{"ERROR: Argument is out of range."};
+ }
+ catch (...)
+ {
+ throw std::runtime_error{"ERROR: Unknown error"};
+ }
+
+ switch (int_tokens.size())
+ {
+ case 0: // inputs like "-"
+ {
+ throw std::runtime_error{"ERROR: Nothing was entered"};
+ }
+ case 1: // inputs like "1", "2"
+ {
+ by_id.push_back(int_tokens.at(0));
+ break;
+ }
+ case 2: // inputs like "1-2", "11-50"
+ {
+ for (uint32_t i = int_tokens.at(0); i <= int_tokens.at(1); i++)
+ {
+ by_id.push_back(i);
+ }
+ break;
+ }
+ default: // inputs like "1-2-3"
+ {
+ throw std::runtime_error{"ERROR: Too many '-' in str."};
+ }
+ }
+ }
+
+ loco::Graph *graph = _module->graph(0);
+ std::vector<const luci::CircleNode *> selected_nodes;
+
+ for (auto node : loco::all_nodes(graph))
+ {
+ auto cnode = loco::must_cast<const luci::CircleNode *>(node);
+
+ try
+ {
+ auto node_id = luci::get_node_id(cnode);
+ for (auto selected_id : by_id)
+ {
+ if (selected_id == node_id)
+ {
+ selected_nodes.emplace_back(cnode);
+ }
+ }
+ }
+ catch (const std::runtime_error &)
+ {
+ continue;
+ }
+ }
+
+ return selected_nodes;
+}
+
+template <>
+std::vector<const luci::CircleNode *>
+OpSelector::select_by<SelectType::NAME>(const std::vector<std::string> &tokens)
+{
+ loco::Graph *graph = _module->graph(0);
+ std::vector<const luci::CircleNode *> selected_nodes;
+
+ for (auto node : loco::all_nodes(graph))
+ {
+ auto cnode = loco::must_cast<const luci::CircleNode *>(node);
+ std::string node_name = cnode->name();
+
+ for (auto selected_name : tokens)
+ if (selected_name.compare(node_name) == 0) // find the selected name
+ selected_nodes.emplace_back(cnode);
+ }
+
+ return selected_nodes;
+}
+
+template <SelectType SELECT_TYPE>
+std::unique_ptr<luci::Module> OpSelector::select_by(const std::string &str)
+{
+ auto colon_tokens = ::split_into_vector(str, ',');
+ if (colon_tokens.empty())
+ {
+ throw std::runtime_error{"ERROR: Nothing was entered."};
+ }
+
+ assert(_module->size() == 1);
+
+ auto selected_nodes = select_by<SELECT_TYPE>(colon_tokens);
+
+ // multiout node should be considered
+ IsMultiOutputNode multiout_visitor;
+ std::vector<const luci::CircleNode *> output_nodes;
+ for (const auto &node : selected_nodes)
+ {
+ if (node->accept(&multiout_visitor))
+ {
+ auto outputs = loco::succs(node);
+ for (auto &o : outputs)
+ {
+ output_nodes.push_back(dynamic_cast<luci::CircleNode *>(o));
+ }
+ }
+ }
+ selected_nodes.insert(selected_nodes.end(), output_nodes.begin(), output_nodes.end());
+
+ auto new_module = std::make_unique<luci::Module>();
+ new_module->add(::make_graph(selected_nodes));
+
+ return new_module;
+}
+
+template std::unique_ptr<luci::Module>
+OpSelector::select_by<SelectType::ID>(const std::string &str);
+
+template std::unique_ptr<luci::Module>
+OpSelector::select_by<SelectType::NAME>(const std::string &str);
+
+} // namespace opselector
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPSELECTOR_OPSELECTOR_H__
+#define __CIRCLE_OPSELECTOR_OPSELECTOR_H__
+
+#include "SelectType.h"
+
+#include <luci/IR/Module.h>
+#include <luci/IR/CircleNodeDecl.h>
+
+#include <string>
+#include <vector>
+
+namespace opselector
+{
+
+class OpSelector final
+{
+private:
+ const luci::Module *_module;
+
+public:
+ OpSelector(const luci::Module *module);
+
+private:
+ template <SelectType SELECT_TYPE>
+ std::vector<const luci::CircleNode *> select_by(const std::vector<std::string> &tokens);
+
+public:
+ template <SelectType SELECT_TYPE> std::unique_ptr<luci::Module> select_by(const std::string &str);
+};
+
+extern template std::unique_ptr<luci::Module>
+OpSelector::select_by<SelectType::ID>(const std::string &str);
+extern template std::unique_ptr<luci::Module>
+OpSelector::select_by<SelectType::NAME>(const std::string &str);
+
+} // namespace opselector
+
+#endif // __CIRCLE_OPSELECTOR_OPSELECTOR_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "OpSelector.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ * Conv-Donv graphlet
+ *
+ * [Conv]
+ * |
+ * [Donv]
+ *
+ */
+class ConvDonvGraphlet
+{
+public:
+ void init(loco::Graph *g)
+ {
+ _conv_filter = g->nodes()->create<luci::CircleConst>();
+ _conv_filter->dtype(loco::DataType::FLOAT32);
+ _conv_filter->shape({16, 1, 1, 16});
+ _conv_filter->name("conv_filter");
+
+ _conv_bias = g->nodes()->create<luci::CircleConst>();
+ _conv_bias->dtype(loco::DataType::FLOAT32);
+ _conv_bias->shape({16});
+ _conv_bias->name("conv_bias");
+
+ _conv = g->nodes()->create<luci::CircleConv2D>();
+ _conv->padding(luci::Padding::SAME);
+ _conv->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _conv->dtype(loco::DataType::FLOAT32);
+ _conv->shape({1, 4, 4, 16});
+ _conv->name("conv");
+ _conv->filter(_conv_filter);
+ _conv->bias(_conv_bias);
+
+ _dconv_filter = g->nodes()->create<luci::CircleConst>();
+ _dconv_filter->dtype(loco::DataType::FLOAT32);
+ _dconv_filter->shape({16, 1, 1, 16});
+ _dconv_filter->name("dconv_filter");
+
+ _dconv_bias = g->nodes()->create<luci::CircleConst>();
+ _dconv_bias->dtype(loco::DataType::FLOAT32);
+ _dconv_bias->shape({16});
+ _dconv_bias->name("dconv_bias");
+
+ _dconv = g->nodes()->create<luci::CircleDepthwiseConv2D>();
+ _dconv->input(_conv);
+ _dconv->depthMultiplier(1);
+ _dconv->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _dconv->dtype(loco::DataType::FLOAT32);
+ _dconv->shape({1, 4, 4, 16});
+ _dconv->padding(luci::Padding::SAME);
+ _dconv->name("dconv");
+ _dconv->filter(_dconv_filter);
+ _dconv->bias(_dconv_bias);
+ }
+
+protected:
+ luci::CircleConv2D *_conv{nullptr};
+ luci::CircleConst *_conv_filter{nullptr};
+ luci::CircleConst *_conv_bias{nullptr};
+ luci::CircleDepthwiseConv2D *_dconv{nullptr};
+ luci::CircleConst *_dconv_filter{nullptr};
+ luci::CircleConst *_dconv_bias{nullptr};
+};
+
+class ConvDonvGraph : public luci::test::TestIOGraph, public ConvDonvGraphlet
+{
+public:
+ ConvDonvGraph()
+ {
+ luci::test::TestIOGraph::init({1, 4, 4, 16}, {1, 4, 4, 16});
+ ConvDonvGraphlet::init(g());
+
+ _conv->input(input());
+
+ output()->from(_dconv);
+ }
+
+ std::unique_ptr<loco::Graph> graph(void) { return std::move(_g); }
+};
+
+} // namespace
+
+TEST(OpSelectorTest, select_by_name)
+{
+ auto m = luci::make_module();
+
+ ConvDonvGraph g;
+ g.transfer_to(m.get());
+
+ opselector::OpSelector op_selector{m.get()};
+
+ // Select conv only
+ auto conv_module = op_selector.select_by<opselector::SelectType::NAME>("conv");
+ ASSERT_EQ(1, conv_module->size());
+
+ auto conv_graph = conv_module->graph(0);
+ ASSERT_EQ(1, conv_graph->outputs()->size());
+
+ auto output_node1 = luci::output_node(conv_graph, 0);
+ auto conv = loco::must_cast<luci::CircleConv2D *>(output_node1->from());
+ EXPECT_STREQ("conv", conv->name().c_str());
+ auto conv_filter = loco::must_cast<luci::CircleConst *>(conv->filter());
+ EXPECT_STREQ("conv_filter", conv_filter->name().c_str());
+ auto conv_bias = loco::must_cast<luci::CircleConst *>(conv->bias());
+ EXPECT_STREQ("conv_bias", conv_bias->name().c_str());
+
+ // Select dconv only
+ auto dconv_module = op_selector.select_by<opselector::SelectType::NAME>("dconv");
+ ASSERT_EQ(1, dconv_module->size());
+
+ auto dconv_graph = dconv_module->graph(0);
+ ASSERT_EQ(1, dconv_graph->outputs()->size());
+
+ auto output_node2 = luci::output_node(dconv_graph, 0);
+ auto dconv = loco::must_cast<luci::CircleDepthwiseConv2D *>(output_node2->from());
+ EXPECT_STREQ("dconv", dconv->name().c_str());
+ auto dconv_filter = loco::must_cast<luci::CircleConst *>(dconv->filter());
+ EXPECT_STREQ("dconv_filter", dconv_filter->name().c_str());
+ auto dconv_bias = loco::must_cast<luci::CircleConst *>(dconv->bias());
+ EXPECT_STREQ("dconv_bias", dconv_bias->name().c_str());
+}
+
+TEST(OpSelectorTest, select_by_name_NEG)
+{
+ auto m = luci::make_module();
+
+ ConvDonvGraph g;
+ g.transfer_to(m.get());
+
+ opselector::OpSelector op_selector{m.get()};
+
+ EXPECT_ANY_THROW(op_selector.select_by<opselector::SelectType::NAME>(","));
+}
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPSELECTOR_SELECT_TYPE_H__
+#define __CIRCLE_OPSELECTOR_SELECT_TYPE_H__
+
+#include <string>
+
+namespace opselector
+{
+
+enum class SelectType
+{
+ ID,
+ NAME,
+};
+
+} // namespace opselector
+
+#endif // __CIRCLE_OPSELECTOR_SELECT_TYPE_H__
add_custom_command(OUTPUT ${TFLITE_DST_PATH}
COMMAND ${CMAKE_COMMAND} -E copy "${TFLITE_SRC_PATH}" "${TFLITE_DST_PATH}"
- DEPENDS ${TFLITE_SRC_PATH} ${PARTITIONER_OUTPUT_PATH}
+ DEPENDS ${TFLITE_SRC_PATH}
COMMENT "Copy ${RECIPE_NAME}.tflite"
)
list(APPEND TEST_DEPS ${TFLITE_DST_PATH})
add_custom_command(OUTPUT ${CIRCLE_DST_PATH}
COMMAND ${CMAKE_COMMAND} -E copy "${CIRCLE_SRC_PATH}" "${CIRCLE_DST_PATH}"
- DEPENDS ${CIRCLE_SRC_PATH} ${PARTITIONER_OUTPUT_PATH}
+ DEPENDS ${CIRCLE_SRC_PATH}
COMMENT "Copy ${RECIPE_NAME}.circle"
)
list(APPEND TEST_DEPS ${CIRCLE_DST_PATH})
add_custom_command(OUTPUT ${PART_DST_PATH}
COMMAND ${CMAKE_COMMAND} -E copy "${PART_SRC_PATH}" "${PART_DST_PATH}"
- DEPENDS ${PART_SRC_PATH} ${PARTITIONER_OUTPUT_PATH}
+ DEPENDS ${PART_SRC_PATH}
COMMENT "Copy ${PART_FILE}"
)
list(APPEND TEST_DEPS ${PART_DST_PATH})
"$<TARGET_FILE:circle_part_driver>"
${PARTITION_LIST}
)
+
+if(ONE_UBUNTU_CODENAME_JAMMY)
+ add_test(NAME circle_part_value_210_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/part_eval_all.sh"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ "${NNCC_OVERLAY_DIR}/venv_2_10_1"
+ "$<TARGET_FILE:circle_part_driver>"
+ ${PARTITION_LIST}
+ )
+endif(ONE_UBUNTU_CODENAME_JAMMY)
list(APPEND TEST_NAMES ${RECIPE})
endmacro(AddFakeQuant)
+# Macro to quantize without quantize_dequantize_weights
+macro(AddSkipQDQW RECIPE)
+ cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+ set(QCONFIG_OPT "")
+ if(ARG_USE_QCONFIG)
+ set(QCONFIG_OPT "--config" "${ARTIFACTS_BIN_PATH}/${RECIPE}.qconf.json")
+ endif()
+
+ set(INPUT_DTYPE_OPT "")
+ if(ARG_INPUT_DTYPE)
+ set(INPUT_DTYPE_OPT "--input_type" "${ARG_INPUT_DTYPE}")
+ endif()
+
+ set(OUTPUT_DTYPE_OPT "")
+ if(ARG_OUTPUT_DTYPE)
+ set(OUTPUT_DTYPE_OPT "--output_type" "${ARG_OUTPUT_DTYPE}")
+ endif()
+
+ set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE}.circle")
+ set(RECORDED_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.recorded.circle")
+ set(QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.q.circle")
+
+ # Generate quantized .circle
+ add_custom_command(OUTPUT ${QUANT_CIRCLE_PATH}
+ COMMAND $<TARGET_FILE:record-minmax> --input_model ${CIRCLE_PATH} --output_model ${RECORDED_CIRCLE_PATH}
+ COMMAND $<TARGET_FILE:circle-quantizer>
+ --quantize_with_minmax float32 ${ARG_DTYPE} ${ARG_GRANULARITY}
+ ${QCONFIG_OPT} ${RECORDED_CIRCLE_PATH} ${QUANT_CIRCLE_PATH}
+ ${INPUT_DTYPE_OPT} ${OUTPUT_DTYPE_OPT}
+ DEPENDS
+ circle-quantizer
+ record-minmax
+ ${CIRCLE_PATH}
+ COMMENT "Generate ${RECIPE}.q.circle"
+ )
+
+ list(APPEND TEST_DEPS ${QUANT_CIRCLE_PATH})
+ list(APPEND TEST_NAMES ${RECIPE})
+endmacro(AddSkipQDQW)
+
# Read "test.lst"
include("test.lst")
2. Make a recipe (`test.recipe`) for fp32 model under the directory.
-3. Make a rule (`test.rule`) you want to test under the directory. (For more information on dredd-test-rules, see _dredd-rule-lib_ module.)
+3. Make a rule (`test.rule`) you want to test under the directory.
+(For more information on dredd-test-rules, see _dredd-rule-lib_ module.)
4. Add test to `test.lst` in this module with `Add` macro.
-
-```
-Add(RECIPE_DIR DTYPE dtype GRANULARITY granularity USE_QCONFIG)
-```
-
-- `RECIPE_DIR`: Path to the directory where the recipe file is saved.
-- `DTYPE`: Default quantization dtype (uint8, int16)
-- `GRANULARITY`: Quantization granularity (channel, layer)
-- `USE_QCONFIG`: (Optional) Whether to use a quantization configuration file or not. If this is set, `test.qconf.json` should exist under `RECIPE_DIR`
+ ```
+ Add(RECIPE_DIR DTYPE dtype GRANULARITY granularity USE_QCONFIG)
+ ```
+ - `RECIPE_DIR`: Path to the directory where the recipe file is saved.
+ - `DTYPE`: Default quantization dtype (uint8, int16)
+ - `GRANULARITY`: Quantization granularity (channel, layer)
+ - `USE_QCONFIG`: (Optional) Whether to use a quantization configuration file or not.
+ If this is set, `test.qconf.json` should exist under `RECIPE_DIR`
## Example
Add(Quant_Conv_002 DTYPE uint8 GRANULARITY channel INPUT_DTYPE float32 OUTPUT_DTYPE float32)
AddFakeQuant(Quant_Add_000)
+AddFakeQuant(Quant_DepthToSpace_000)
+AddFakeQuant(Quant_SpaceToDepth_000)
## CIRCLE RECIPE
# MPQ Test (default: s16, target: u8)
Add(Quant_InstanceNorm_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+
+# Test for quantization without QuantizeDequantizeWeights
+AddSkipQDQW(Quant_Conv_005 DTYPE uint8 GRANULARITY channel)
+AddSkipQDQW(Quant_Conv_006 DTYPE int16 GRANULARITY channel)
int entry(int argc, char **argv)
{
- // Simple argument parser (based on map)
- std::map<std::string, OptionHook> argparse;
luci::CircleQuantizer quantizer;
auto options = quantizer.options();
"destination_tensor_name(string)");
arser.add_argument("--input_type")
- .help("Input type of quantized model (uint8, int16, or float32)");
+ .help("Input type of quantized model (uint8, int16, int32, int64, float32, or bool). For "
+ "multiple inputs, "
+ "use comma-separated values. e.g., uint8,int16");
arser.add_argument("--output_type")
- .help("Output type of quantized model (uint8, int16, or float32)");
+ .help("Output type of quantized model (uint8, int16, int32, int64, float32, or bool). For "
+ "multiple outputs, "
+ "use comma-separated values. e.g., uint8,int16");
arser.add_argument(cfg).help("Path to the quantization configuration file");
It tests the non-functional conditions of the optimized circle binary resulting from circle2circle.
-This test basically refers to the _TensorFlowLiteRecipes_ resource. So you should add what you want to test to both of the resource and `test.lst`.
+This test basically refers to the _TensorFlowLiteRecipes_ resource.
+So you should add what you want to test to both of the resource and `test.lst`.
## Example
Add(Net_Conv_QuantDequant_000 PASS remove_quantdequant)
Add(Net_Conv_Min_Max_000 PASS transform_min_max_to_relu6)
Add(Net_Conv_Min_Relu_000 PASS transform_min_relu_to_relu6)
+Add(Net_Conv_PReluGraph_000 PASS fuse_prelu)
Add(Net_Conv_Relu6_000 PASS fuse_activation_function)
+Add(Net_Duplicate_Weights_000 PASS remove_duplicate_const)
Add(Net_DwConv_BN_000 PASS fuse_batchnorm_with_dwconv)
Add(Net_DwConv_BN_001 PASS fuse_batchnorm_with_dwconv)
+Add(Net_FullyConnected_Add_000 PASS fold_fully_connected)
Add(Net_Reshape_Reshape_000 PASS remove_redundant_reshape)
Add(Net_Squeeze_Squeeze_000 PASS substitute_squeeze_to_reshape)
Add(Net_TConv_Add_000 PASS fuse_add_with_tconv)
Add(Net_TConv_BN_002 PASS fuse_batchnorm_with_tconv)
Add(Net_TConv_BN_003 PASS fuse_batchnorm_with_tconv)
Add(Net_TConv_BN_004 PASS fuse_batchnorm_with_tconv)
+Add(Net_TConv_BN_005 PASS fuse_batchnorm_with_tconv)
Add(Net_InstanceNorm_001 PASS fuse_instnorm)
Add(Net_InstanceNorm_003 PASS fuse_instnorm)
Add(Net_InstanceNorm_004 PASS fuse_instnorm)
Add(MaxPoolWithArgmax_001 PASS resolve_customop_max_pool_with_argmax)
Add(MaxPoolWithArgmax_002 PASS resolve_customop_max_pool_with_argmax)
Add(FullyConnected_007 PASS replace_non_const_fc_with_batch_matmul)
+Add(FullyConnected_008 PASS replace_non_const_fc_with_batch_matmul)
## CIRCLE RECIPE
echo "-- Found circle-inspect: ${CIRCLE_INSPECT_PATH}"
echo "-- Found circle-verify: ${CIRCLE_VERIFY_PATH}"
-echo "-- Found circle2circle: ${CIRCLE2CIRCLE_PATH}"
echo "-- Found common-artifacts: ${RESOURCE_DIR}"
TESTED=()
add_switch(arser, "--fold_dequantize", "This will fold dequantize op");
add_switch(arser, "--fold_dwconv",
"This will fold Depthwise Convolution operator with constant inputs");
+ add_switch(arser, "--fold_fully_connected",
+ "This will fold FullyConnected operator with constant inputs");
add_switch(arser, "--fold_gather", "This will fold Gather operator");
add_switch(arser, "--fold_sparse_to_dense", "This will fold SparseToDense operator");
add_switch(arser, "--forward_reshape_to_unaryop",
"This will move Reshape after UnaryOp for centain condition");
+ add_switch(arser, "--forward_transpose_op",
+ "This will move Transpose Op forward if possible (for further optimization)");
add_switch(arser, "--fuse_activation_function",
"This will fuse Activation function to a preceding operator");
add_switch(arser, "--fuse_add_with_fully_connected",
"when the impact is known to be acceptable.");
add_switch(arser, "--fuse_preactivation_batchnorm",
"This will fuse BatchNorm operators of pre-activations to Convolution operator");
+ add_switch(arser, "--fuse_prelu", "This will fuse operators to PReLU operator");
+ add_switch(arser, "--remove_duplicate_const", "This will remove all duplicate constant nodes");
add_switch(arser, "--remove_fakequant", "This will remove FakeQuant operators");
add_switch(arser, "--remove_quantdequant", "This will remove Quantize-Dequantize sequence");
add_switch(arser, "--remove_redundant_quantize", "This will remove redundant Quantize operators");
add_switch(arser, "--substitute_transpose_to_reshape",
"This will convert single input Transpose to Reshape");
add_switch(arser, "--expand_broadcast_const", "This will expand broadcastable constant inputs");
+ add_switch(arser, "--unroll_unidirseqlstm", "Unroll UnidirectionalSequenceLSTM operator.");
add_switch(arser, "--convert_nchw_to_nhwc",
"Experimental: This will convert NCHW operators to NHWC under the assumption that "
"input model is NCHW.");
options->enable(Algorithms::FoldDequantize);
if (arser.get<bool>("--fold_dwconv"))
options->enable(Algorithms::FoldDepthwiseConv2D);
+ if (arser.get<bool>("--fold_fully_connected"))
+ options->enable(Algorithms::FoldFullyConnected);
if (arser.get<bool>("--fold_gather"))
options->enable(Algorithms::FoldGather);
if (arser.get<bool>("--fold_sparse_to_dense"))
options->enable(Algorithms::FoldSparseToDense);
if (arser.get<bool>("--forward_reshape_to_unaryop"))
options->enable(Algorithms::ForwardReshapeToUnaryOp);
+ if (arser.get<bool>("--forward_transpose_op"))
+ options->enable(Algorithms::ForwardTransposeOp);
if (arser.get<bool>("--fuse_activation_function"))
options->enable(Algorithms::FuseActivationFunction);
if (arser.get<bool>("--fuse_batchnorm_with_conv"))
options->enable(Algorithms::MakeBatchNormGammaPositive);
if (arser.get<bool>("--fuse_preactivation_batchnorm"))
options->enable(Algorithms::FusePreActivationBatchNorm);
+ if (arser.get<bool>("--fuse_prelu"))
+ options->enable(Algorithms::FusePRelu);
if (arser.get<bool>("--fuse_transpose_with_mean"))
options->enable(Algorithms::FuseTransposeWithMean);
+ if (arser.get<bool>("--remove_duplicate_const"))
+ options->enable(Algorithms::RemoveDuplicateConst);
if (arser.get<bool>("--remove_fakequant"))
options->enable(Algorithms::RemoveFakeQuant);
if (arser.get<bool>("--remove_quantdequant"))
options->enable(Algorithms::TransformMinReluToRelu6Pass);
if (arser.get<bool>("--expand_broadcast_const"))
options->enable(Algorithms::ExpandBroadcastConst);
+ if (arser.get<bool>("--unroll_unidirseqlstm"))
+ options->enable(Algorithms::UnrollUnidirSeqLSTM);
if (arser.get<bool>("--mute_warnings"))
settings->set(luci::UserSettings::Key::MuteWarnings, true);
#[[ Generate common python virtual enviornment ]]
+# NOTE find_package try to use at least python3.8 as follows depending on platform version
+# Ubuntu18.04; explictly installed python3.8 (default is python3.6)
+# Ubuntu20.04; default python3.8
+# Ubuntu22.04; default python3.10
+# refer https://github.com/Samsung/ONE/issues/9962
find_package(PythonInterp 3.8 QUIET)
find_package(PythonLibs 3.8 QUIET)
# Create python virtual environment with tensorflow 2.8.0
set(VIRTUALENV_OVERLAY_TF_2_8_0 "${NNCC_OVERLAY_DIR}/venv_2_8_0")
+# TensorFlow 2.10.1 for Ubuntu22.04
+if(ONE_UBUNTU_CODENAME_JAMMY)
+ set(VIRTUALENV_OVERLAY_TF_2_10_1 "${NNCC_OVERLAY_DIR}/venv_2_10_1")
+endif(ONE_UBUNTU_CODENAME_JAMMY)
add_custom_command(
OUTPUT ${VIRTUALENV_OVERLAY_TF_2_8_0}
set(REQUIREMENTS_FILE "requirements.txt")
set(REQUIREMENTS_OVERLAY_PATH_TF_2_8_0 "${VIRTUALENV_OVERLAY_TF_2_8_0}/${REQUIREMENTS_FILE}")
+if(ONE_UBUNTU_CODENAME_JAMMY)
+ add_custom_command(
+ OUTPUT ${VIRTUALENV_OVERLAY_TF_2_10_1}
+ COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_10_1}
+ )
+ set(REQUIREMENTS_FILE "requirements.txt")
+ set(REQUIREMENTS_OVERLAY_PATH_TF_2_10_1 "${VIRTUALENV_OVERLAY_TF_2_10_1}/${REQUIREMENTS_FILE}")
+endif(ONE_UBUNTU_CODENAME_JAMMY)
+
set(PYTHON_OVERLAY python3)
if(PYTHON_EXECUTABLE MATCHES python3.8)
set(PYTHON_OVERLAY python3.8)
COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.8.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
COMMAND ${CMAKE_COMMAND} -E echo "protobuf==3.20.1" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
+ COMMAND ${CMAKE_COMMAND} -E echo "pydot==1.4.2" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000
${PIP_OPTION_TRUSTED_HOST} install --upgrade pip setuptools
COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000
${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
)
+if(ONE_UBUNTU_CODENAME_JAMMY)
+ add_custom_command(
+ OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_10_1}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_10_1}
+ COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.10.1" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_10_1}
+ COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==23.1.21" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_10_1}
+ COMMAND ${CMAKE_COMMAND} -E echo "protobuf==3.19.6" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_10_1}
+ COMMAND ${CMAKE_COMMAND} -E echo "pydot==1.4.2" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_10_1}
+ COMMAND ${VIRTUALENV_OVERLAY_TF_2_10_1}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000
+ ${PIP_OPTION_TRUSTED_HOST} install --upgrade pip setuptools
+ COMMAND ${VIRTUALENV_OVERLAY_TF_2_10_1}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000
+ ${PIP_OPTION_TRUSTED_HOST} install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_10_1} --upgrade
+ DEPENDS ${VIRTUALENV_OVERLAY_TF_2_10_1}
+ )
+
+ add_custom_target(common_artifacts_python_u22_deps ALL
+ DEPENDS ${VIRTUALENV_OVERLAY_TF_2_10_1}
+ ${REQUIREMENTS_OVERLAY_PATH_TF_2_10_1}
+ )
+endif(ONE_UBUNTU_CODENAME_JAMMY)
+
#[[ Generate common resources ]]
# TODO add pbtxt
nnas_find_package(HDF5 QUIET)
set(TEST_RULE_FILENAME "test.rule")
set(TEST_QCONFIG_FILENAME "test.qconf.json")
-set(MODEL2NNPKG "${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh")
+set(MODEL2NNPKG "${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.py")
# Get test case list
unset(RECIPES)
file(GLOB TFLITE_SUBDIR RELATIVE ${TFLITE_RECIPE_REPO} ${TFLITE_RECIPE_REPO}/*)
list(APPEND TEST_DEPS ${NNPKG_DIR})
add_custom_command(OUTPUT ${NNPKG_MODEL}
- COMMAND ${MODEL2NNPKG} ${MODEL_PATH}
- DEPENDS ${MODEL2NNPKG} ${MODEL_PATH} ${NNPKG_DIR}
+ COMMAND ${PYTHON_EXECUTABLE} ${MODEL2NNPKG} -m ${MODEL_PATH}
+ DEPENDS ${MODEL2NNPKG} ${MODEL_PATH}
COMMENT "Generate ${RECIPE} nnpackage"
)
list(APPEND TEST_DEPS ${NNPKG_MODEL})
set(TC_DIRECTORY "${NNPKG_DIR}/metadata/tc")
add_custom_command(OUTPUT ${TC_DIRECTORY}
COMMAND ${CMAKE_COMMAND} -E make_directory ${TC_DIRECTORY}
- DEPENDS ${NNPKG_DIR}
COMMENT "Generate ${RECIPE} nnpackage test directory"
)
list(APPEND TEST_DEPS ${TC_DIRECTORY})
#[[ optimize : Exclude from circle optimization(circle2circle) ]]
## TensorFlowLiteRecipes
-optimize(UnidirectionalSequenceLSTM_001) # This recipe contains is_variable Tensor
## CircleRecipes
tcgenerate(Tile_U8_000)
tcgenerate(TopKV2_000)
tcgenerate(TopKV2_001)
-tcgenerate(UnidirectionalSequenceLSTM_000) # runtime and luci-interpreter doesn't support UnidirectionalSequenceLSTM op yet
-tcgenerate(UnidirectionalSequenceLSTM_001) # runtime and luci-interpreter doesn't support UnidirectionalSequenceLSTM op yet
+tcgenerate(UnidirectionalSequenceLSTM_000) # This mode is just for Op creation, cannot run
tcgenerate(Unique_000)
tcgenerate(Unique_001)
tcgenerate(Unique_002)
--- /dev/null
+test.local.lst
--- /dev/null
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+unset(DALGONA_SINGLE_OP_TEST)
+
+macro(singleOpTest NAME)
+ list(APPEND DALGONA_SINGLE_OP_TEST ${NAME})
+endmacro(singleOpTest)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+unset(TEST_DEPS)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+# Place test scripts in one place
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/GenH5RandomInputs.py" "${CMAKE_CURRENT_BINARY_DIR}/GenH5RandomInputs.py" COPYONLY)
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/SingleOperatorTest.py" "${CMAKE_CURRENT_BINARY_DIR}/SingleOperatorTest.py" COPYONLY)
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/RandomDataGenerator.py" "${CMAKE_CURRENT_BINARY_DIR}/RandomDataGenerator.py" COPYONLY)
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/TestUtil.py" "${CMAKE_CURRENT_BINARY_DIR}/TestUtil.py" COPYONLY)
+
+###
+### Generate test.config
+###
+set(TEST_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/test.config")
+
+add_custom_command(
+ OUTPUT ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'DALGONA_PATH=\"$<TARGET_FILE:dalgona>\"' >> ${TEST_CONFIG}
+ DEPENDS dalgona
+ COMMENT "Generate test configuration"
+)
+
+# Import pics module
+get_target_property(PICS_BIN_PATH pics BINARY_DIR)
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/circle
+ COMMAND ${CMAKE_COMMAND} -E create_symlink
+ ${PICS_BIN_PATH}/circle ${CMAKE_CURRENT_BINARY_DIR}/circle)
+
+list(APPEND TEST_DEPS "${TEST_CONFIG}" "${CMAKE_CURRENT_BINARY_DIR}/circle")
+
+# This enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(dalgona_test_deps ALL DEPENDS ${TEST_DEPS})
+
+# Run tests
+add_test(
+ NAME dalgona_single_op_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/TestSingleOp.sh"
+ "${TEST_CONFIG}"
+ "${ARTIFACTS_BIN_PATH}"
+ "${NNCC_OVERLAY_DIR}/venv_2_8_0"
+ ${DALGONA_SINGLE_OP_TEST}
+)
+
+if(ONE_UBUNTU_CODENAME_JAMMY)
+ add_test(
+ NAME dalgona_single_op_210_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/TestSingleOp.sh"
+ "${TEST_CONFIG}"
+ "${ARTIFACTS_BIN_PATH}"
+ "${NNCC_OVERLAY_DIR}/venv_2_10_1"
+ ${DALGONA_SINGLE_OP_TEST}
+ )
+endif(ONE_UBUNTU_CODENAME_JAMMY)
--- /dev/null
+#!/usr/bin/env python3
+
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import h5py as h5
+import numpy as np
+import argparse
+
+from circle.Model import Model
+from RandomDataGenerator import RandomDataGenerator
+
+#
+# This script generates a pack of random input data (.h5) expected by the input circle model
+#
+# Basic usage:
+# gen_h5_random_inputs.py --model <path/to/circle/model> --num_data <number/of/data> --output <path/to/output/data>
+# ex: gen_h5_random_inputs.py --model add.circle --num_data 3 --output add.circle.input.h5
+# (This will create add.circle.input.h5 composed of three random inputs in the same directory as the model)
+parser = argparse.ArgumentParser()
+parser.add_argument('--model', type=str, required=True)
+parser.add_argument('--num_data', type=int, required=True)
+parser.add_argument('--output', type=str, required=True)
+args = parser.parse_args()
+
+model = args.model
+num_data = args.num_data
+output_path = args.output
+
+with open(model, 'rb') as f:
+ buf = f.read()
+ circle_model = Model.GetRootAsModel(buf, 0)
+
+# Assume one subgraph
+assert (circle_model.SubgraphsLength() == 1)
+graph = circle_model.Subgraphs(0)
+inputs = graph.InputsAsNumpy()
+
+# Create h5 file
+h5_file = h5.File(output_path, 'w')
+group = h5_file.create_group("value")
+group.attrs['desc'] = "Input data for " + model
+
+# Generate random data
+for i in range(num_data):
+ sample = group.create_group(str(i))
+ for j in range(len(inputs)):
+ input_index = inputs[j]
+ tensor = graph.Tensors(input_index)
+ g = RandomDataGenerator(tensor.ShapeAsNumpy())
+ input_data = g.gen(tensor.Type())
+ sample.create_dataset(str(j), data=input_data)
+
+h5_file.close()
--- /dev/null
+#!/usr/bin/env python3
+
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import numpy as np
+from circle.TensorType import TensorType
+
+
+class RandomDataGenerator:
+ def __init__(self, shape):
+ self.shape = shape
+
+ def _unsupported_types(self):
+ raise RuntimeError('Unsupported data type')
+
+ def _gen_uint8(self):
+ return np.random.randint(0, high=256, size=self.shape, dtype=np.uint8)
+
+ def _gen_int16(self):
+ return np.random.randint(-32767, high=32768, size=self.shape, dtype=np.int16)
+
+ def _gen_float32(self):
+ return np.array(10 * np.random.random_sample(self.shape) - 5, np.float32)
+
+ def gen(self, dtype):
+ gen_book = dict()
+ gen_book[TensorType.UINT8] = self._gen_uint8
+ gen_book[TensorType.INT16] = self._gen_int16
+ gen_book[TensorType.FLOAT32] = self._gen_float32
+
+ return gen_book.get(dtype, self._unsupported_types)()
--- /dev/null
+#!/usr/bin/env python3
+
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+""""Test for a model with a single operator"""
+
+from TestUtil import *
+
+from circle import Model
+from circle import BuiltinOptions
+from circle import BuiltinOperator
+from circle import Conv2DOptions
+from circle import DepthwiseConv2DOptions
+from circle import AddOptions
+from circle import FullyConnectedOptions
+from circle import TransposeConvOptions
+from circle import InstanceNormOptions
+from circle import SplitOptions
+
+
+class SingleOperatorTest(object):
+ def StartAnalysis(self, args):
+ """Called when the analysis starts"""
+ with open(args, 'rb') as f:
+ buffer = f.read()
+ self._model = Model.Model.GetRootAsModel(buffer, 0)
+
+ # Check model has one subgraph
+ assertTrue(self._model.SubgraphsLength() == 1, "Model has more than one subgraph")
+ graph = self._model.Subgraphs(0)
+
+ # Check model has one operator
+ assertTrue(graph.OperatorsLength() == 1, "Model has more than one operator")
+ self._op = graph.Operators(0)
+
+ def DefaultOpPost(self, name, opcode, inputs, output):
+ raise SystemExit('NYI operator: ' + str(opcode))
+
+ def testConv2D(self, padding, stride, dilation, fused_act):
+ # Check opcode
+ opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+ checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.CONV_2D)
+
+ # Check option
+ checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+ BuiltinOptions.BuiltinOptions.Conv2DOptions)
+
+ self._opt = self._op.BuiltinOptions()
+ opt = Conv2DOptions.Conv2DOptions()
+ opt.Init(self._opt.Bytes, self._opt.Pos)
+ checkPadding(padding, opt.Padding())
+ assertTrue(opt.StrideW() == stride['w'], "Stride_w mismatches")
+ assertTrue(opt.StrideH() == stride['h'], "Stride_h mismatches")
+ assertTrue(opt.DilationWFactor() == dilation['w'], "Dilation_w mismatches")
+ assertTrue(opt.DilationHFactor() == dilation['h'], "Dilation_w mismatches")
+ checkActivation(fused_act, opt.FusedActivationFunction())
+
+ def Conv2DPre(self, name, input, filter, bias, padding, stride, dilation, fused_act):
+ self.testConv2D(padding, stride, dilation, fused_act)
+
+ def Conv2DPost(self, name, input, filter, bias, padding, stride, dilation, output,
+ fused_act):
+ self.testConv2D(padding, stride, dilation, fused_act)
+
+ def testAdd(self, fused_act):
+ # Check opcode
+ opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+ checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.ADD)
+
+ # Check option
+ checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+ BuiltinOptions.BuiltinOptions.AddOptions)
+
+ self._opt = self._op.BuiltinOptions()
+ opt = AddOptions.AddOptions()
+ opt.Init(self._opt.Bytes, self._opt.Pos)
+ checkActivation(fused_act, opt.FusedActivationFunction())
+
+ def AddPre(self, name, x, y, fused_act):
+ self.testAdd(fused_act)
+
+ def AddPost(self, name, x, y, output, fused_act):
+ self.testAdd(fused_act)
+
+ def testDepthwiseConv2D(self, padding, stride, depth_multiplier, dilation, fused_act):
+ # Check opcode
+ opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+ checkOpcode(opcode.BuiltinCode(),
+ BuiltinOperator.BuiltinOperator.DEPTHWISE_CONV_2D)
+
+ # Check option
+ checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+ BuiltinOptions.BuiltinOptions.DepthwiseConv2DOptions)
+
+ self._opt = self._op.BuiltinOptions()
+ opt = DepthwiseConv2DOptions.DepthwiseConv2DOptions()
+ opt.Init(self._opt.Bytes, self._opt.Pos)
+ checkPadding(padding, opt.Padding())
+ assertTrue(opt.StrideW() == stride['w'], "Stride_w mismatches")
+ assertTrue(opt.StrideH() == stride['h'], "Stride_h mismatches")
+ assertTrue(opt.DepthMultiplier() == depth_multiplier,
+ "Depth multiplier mismatches")
+ assertTrue(opt.DilationWFactor() == dilation['w'], "Dilation_w mismatches")
+ assertTrue(opt.DilationHFactor() == dilation['h'], "Dilation_w mismatches")
+ checkActivation(fused_act, opt.FusedActivationFunction())
+
+ def DepthwiseConv2DPre(self, name, input, filter, bias, padding, stride,
+ depth_multiplier, dilation, fused_act):
+ self.testDepthwiseConv2D(padding, stride, depth_multiplier, dilation, fused_act)
+
+ def DepthwiseConv2DPost(self, name, input, filter, bias, padding, stride,
+ depth_multiplier, dilation, output, fused_act):
+ self.testDepthwiseConv2D(padding, stride, depth_multiplier, dilation, fused_act)
+
+ def testFullyConnected(self, fused_act):
+ # Check opcode
+ opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+ checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.FULLY_CONNECTED)
+
+ # Check option
+ checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+ BuiltinOptions.BuiltinOptions.FullyConnectedOptions)
+
+ self._opt = self._op.BuiltinOptions()
+ opt = FullyConnectedOptions.FullyConnectedOptions()
+ opt.Init(self._opt.Bytes, self._opt.Pos)
+ checkActivation(fused_act, opt.FusedActivationFunction())
+
+ def FullyConnectedPre(self, name, input, weights, bias, fused_act):
+ self.testFullyConnected(fused_act)
+
+ def FullyConnectedPost(self, name, input, weights, bias, output, fused_act):
+ self.testFullyConnected(fused_act)
+
+ def testTransposeConv(self, padding, stride):
+ # Check opcode
+ opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+ checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.TRANSPOSE_CONV)
+
+ # Check option
+ checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+ BuiltinOptions.BuiltinOptions.TransposeConvOptions)
+
+ self._opt = self._op.BuiltinOptions()
+ opt = TransposeConvOptions.TransposeConvOptions()
+ opt.Init(self._opt.Bytes, self._opt.Pos)
+ checkPadding(padding, opt.Padding())
+ assertTrue(opt.StrideW() == stride['w'], "Stride_w mismatches")
+ assertTrue(opt.StrideH() == stride['h'], "Stride_h mismatches")
+
+ def TransposeConvPre(self, name, input, filter, output_shape, bias, padding, stride):
+ self.testTransposeConv(padding, stride)
+
+ def TransposeConvPost(self, name, input, filter, output_shape, bias, padding, stride,
+ output):
+ self.testTransposeConv(padding, stride)
+
+ def testInstanceNorm(self, epsilon, fused_act):
+ # Check opcode
+ opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+ checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.INSTANCE_NORM)
+
+ # Check option
+ checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+ BuiltinOptions.BuiltinOptions.InstanceNormOptions)
+
+ self._opt = self._op.BuiltinOptions()
+ opt = InstanceNormOptions.InstanceNormOptions()
+ opt.Init(self._opt.Bytes, self._opt.Pos)
+ assertTrue(opt.Epsilon() == epsilon, "epsilon mismatches")
+ checkActivation(fused_act, opt.FusedActivationFunction())
+
+ def InstanceNormPre(self, name, input, gamma, beta, epsilon, fused_act):
+ self.testInstanceNorm(epsilon, fused_act)
+
+ def InstanceNormPost(self, name, input, gamma, beta, epsilon, output, fused_act):
+ self.testInstanceNorm(epsilon, fused_act)
+
+ def testSplit(self, num_split):
+ # Check opcode
+ opcode = self._model.OperatorCodes(self._op.OpcodeIndex())
+ checkOpcode(opcode.BuiltinCode(), BuiltinOperator.BuiltinOperator.SPLIT)
+
+ # Check option
+ checkBuiltinOptionType(self._op.BuiltinOptionsType(),
+ BuiltinOptions.BuiltinOptions.SplitOptions)
+
+ self._opt = self._op.BuiltinOptions()
+ opt = SplitOptions.SplitOptions()
+ opt.Init(self._opt.Bytes, self._opt.Pos)
+ assertTrue(opt.NumSplits() == num_split, "num_split mismatches")
+
+ def SplitPre(self, name, split_dim, input, num_split):
+ self.testSplit(num_split)
+
+ def SplitPost(self, name, split_dim, input, num_split, outputs):
+ self.testSplit(num_split)
+ assertTrue(num_split == len(outputs), "num_split mismatches with outputs")
--- /dev/null
+#!/bin/bash
+
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# This script tests the basic behavior of dalgona
+#
+# HOW TO USE
+#
+# ./test_single_op.sh <path/to/test.config> <path/to/work_dir> <path/to/venv> <TEST 1> <TEST 2> ...
+# test.config : set ${DALGONA_PATH}
+# work_dir : archive of common-artifacts (ex: build/compiler/common-artifacts)
+# venv : virtual environment for python execution
+
+CONFIG_PATH="$1"; shift
+BIN_PATH=$(dirname "$CONFIG_PATH")
+GEN_SCRIPT_PATH="${BIN_PATH}/GenH5RandomInputs.py"
+TEST_SCRIPT_PATH="${BIN_PATH}/SingleOperatorTest.py"
+WORKDIR="$1"; shift
+VIRTUALENV="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found DALGONA: ${DALGONA_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+for TESTCASE in "$@"; do
+ TESTED+=("${TESTCASE}")
+
+ TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+
+ PASSED_TAG="${BIN_PATH}/${TESTCASE}.passed"
+ rm -f "${PASSED_TAG}"
+
+ cat > "${BIN_PATH}/${TESTCASE}.log" <(
+ exec 2>&1
+ set -ex
+
+ # Generate random h5 input data
+ source "${VIRTUALENV}/bin/activate"
+ "${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \
+ --model "${TESTCASE_FILE}.circle" \
+ --num_data 3 \
+ --output "${BIN_PATH}/${TESTCASE}.circle.input.h5"
+ if [[ $? -ne 0 ]]; then
+ echo "FAILED TO GENERATE INPUT"
+ continue
+ fi
+
+ # Run dalgona with test script(SingleOperatorTest.py)
+ "${DALGONA_PATH}" \
+ --input_model "${TESTCASE_FILE}.circle" \
+ --input_data "${BIN_PATH}/${TESTCASE}.circle.input.h5" \
+ --analysis "${TEST_SCRIPT_PATH}" \
+ --analysis_args "${TESTCASE_FILE}.circle"
+
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$TESTCASE")
+ else
+ FAILED+=("$TESTCASE")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
--- /dev/null
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from circle import ActivationFunctionType
+from circle import BuiltinOptions
+from circle import Padding
+
+
+def assertTrue(cond, msg):
+ assert cond, msg
+
+
+def checkPadding(pad, exp_pad):
+ if pad == 'SAME':
+ assertTrue(exp_pad == Padding.Padding.SAME, "Padding mismatches")
+ elif pad == 'VALID':
+ assertTrue(exp_pad == Padding.Padding.VALID, "Padding mismatches")
+ else:
+ raise SystemExit('Unsupported padding')
+
+
+def checkActivation(act, exp_act):
+ act_functions = {
+ 'relu': ActivationFunctionType.ActivationFunctionType.RELU,
+ 'relu6': ActivationFunctionType.ActivationFunctionType.RELU6,
+ 'relu_n1_to_1': ActivationFunctionType.ActivationFunctionType.RELU_N1_TO_1,
+ 'tanh': ActivationFunctionType.ActivationFunctionType.TANH,
+ 'none': ActivationFunctionType.ActivationFunctionType.NONE,
+ 'sign_bit': ActivationFunctionType.ActivationFunctionType.SIGN_BIT,
+ }
+
+ try:
+ assertTrue(act_functions[act] == exp_act, "Activation function mismatches")
+ except KeyError:
+ raise SystemExit('Unsupported activation functions')
+
+
+def checkOpcode(opcode, exp_opcode):
+ assertTrue(opcode == exp_opcode,
+ "Opcode mismatches (" + str(opcode) + ", " + str(exp_opcode) + ")")
+
+
+def checkBuiltinOptionType(option, exp_option):
+ assertTrue(
+ option == exp_option,
+ "Built-in option type mismatches (" + str(option) + ", " + str(exp_option) + ")")
--- /dev/null
+require("dalgona")
+require("common-artifacts")
+require("pics")
--- /dev/null
+singleOpTest(Conv2D_000)
+singleOpTest(Conv2D_001)
+singleOpTest(Conv2D_002)
+singleOpTest(Conv2D_003)
+singleOpTest(Split_000)
+singleOpTest(InstanceNorm_000)
--- /dev/null
+# NOTE find_package will try to use at least python3.8 as follows depending on platform version
+# Ubuntu18.04; explictly installed python3.8 (default is python3.6)
+# Ubuntu20.04; default python3.8
+# Ubuntu22.04; default python3.10
+# refer https://github.com/Samsung/ONE/issues/9962
+find_package(PythonInterp 3.8 QUIET)
+find_package(PythonLibs 3.8 QUIET)
+
+if(NOT ${PYTHONINTERP_FOUND})
+ message(STATUS "Build dalgona: FAILED (Python3 is missing)")
+ return()
+endif()
+
+if(${PYTHON_VERSION_MINOR} LESS 8)
+ message(STATUS "Build dalgona: FAILED (Install Python version higher than or equal to 3.8)")
+ return()
+endif()
+
+nnas_find_package(Pybind11)
+if(NOT Pybind11_FOUND)
+ message(STATUS "Build dalgona: FAILED (Pybind11 is missing)")
+ return()
+endif(NOT Pybind11_FOUND)
+
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_compile_options(-fvisibility=hidden)
+
+add_executable(dalgona ${DRIVER} ${SOURCES})
+target_include_directories(dalgona PRIVATE include)
+target_include_directories(dalgona PRIVATE ${PYTHON_INCLUDE_DIRS})
+target_include_directories(dalgona PRIVATE ${Pybind11_INCLUDE_DIRS})
+
+target_link_libraries(dalgona INTERFACE pybind11::embed)
+target_link_libraries(dalgona PRIVATE ${PYTHON_LIBRARIES})
+target_link_libraries(dalgona PRIVATE arser)
+target_link_libraries(dalgona PRIVATE safemain)
+target_link_libraries(dalgona PRIVATE foder)
+target_link_libraries(dalgona PRIVATE luci_import)
+target_link_libraries(dalgona PRIVATE luci_interpreter)
+target_link_libraries(dalgona PRIVATE dio_hdf5)
+target_link_libraries(dalgona PRIVATE nncc_common)
+
+install(TARGETS dalgona DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# dalgona is executable, so we do not link it to the test.
+# Instead, we use TEST_SOURCES to specify sources used for tests.
+set(TEST_SOURCES
+ "src/StringUtils.cpp"
+ "src/RandomUtils.cpp")
+
+nnas_find_package(GTest REQUIRED)
+GTest_AddTest(dalgona_unit_test ${TESTS} ${TEST_SOURCES})
+target_include_directories(dalgona_unit_test PRIVATE src)
+target_link_libraries(dalgona_unit_test luci_lang)
--- /dev/null
+# dalgona
+
+## What is dalgona?
+
+_dalgona_ is a tool for dynamic analysis of deep neural network.
+
+## How it works?
+
+_dalgona_ runs a user's custom analysis code (written in "Python") while performing inference. The analysis code has the form of hooks, called before/after each operator is executed. Intermediate execution results (values of activations) are passed to the hooks, so users can analyze the distribution of activations inside the hooks. The analysis result can be exported as files, log messages or any other forms, used for various purposes (model compression, optimization, etc.).
+
+NOTE Inference is performed by `luci-interpreter`.
+
+## Possible applications
+- Finding quantization parameters based on the distribution of activations
+- Finding sparse activations by observing the portion of zero values
+- Finding the distribution of conditional variables in If-statement and While-statement
+- Visualization of activation data with Python libraries
+
+## Prerequisite
+- Python 3.8 (python3.8, python3.8-dev packages)
+- Circle model (target to analyze)
+- Input data of the model (hdf5 format. See _rawdata2hdf5_ or _gen_h5_explicit_inputs.py_ for more details.)
+- Analysis code (Python code)
+
+## Example
+```
+dalgona \
+ --input_model model.circle
+ --input_data data.h5
+ --analysis analysis/AnalysisTemplate.py
+```
+
+## Arguments
+```
+ --help Show help message and exit
+ --input_model Input model filepath (.circle)
+ --input_data Input data filepath (.h5) (if not given, random data will be used)
+ --analysis Analysis code filepath (.py)
+ --analysis_args (optional) String argument passed to the analysis code
+```
+
+## How to write analysis code?
+
+_dalgona_ provides hooks which are called before/after an operator is executed.
+Users can access tensors relevant to the corresponding operator inside the hooks.
+The information of each operator is passed as the arguments of the hook.
+For example, for a Conv2D operator, _dalgona_ provides following hooks.
+
+```
+ def Conv2DPre(self, name, input, filter, bias, padding, stride, dilation, fused_act)
+ def Conv2DPost(self, name, input, filter, bias, padding, stride, dilation, output, fused_act)
+```
+
+`Conv2DPre`/`Conv2DPost` are called before/after Conv2D is executed, respectively. Users can write codes to analyze the distribution of intermediate tensors using the provided arguments.
+
+(Note that Conv2DPost has one more argument "output", which is the execution result of the operator)
+
+Details about the arguments of each hook can be found in the section "Arguments of Hooks".
+
+We proivde a template for the analysis code in `analysis/AnalysisTemplate.py`. Users can copy the template file and modify it to write their custom analysis codes.
+
+| List of hooks | Explanation |
+| --------------|------------ |
+| StartAnalysis(self) | Called when the analysis starts |
+| EndAnalysis(self) | Called when the analysis ends |
+| StartNetworkExecution(self, inputs) | Called when the execution of a network starts |
+| EndNetworkExecution(self, outputs) | Called when the execution of a network ends |
+| DefaultOpPre(self, name, opcode, inputs) | Default hook called before an operator is executed |
+| DefaultOpPost(self, name, opcode, inputs, output) | Default hook called after an operator is executed |
+| \<OPCODE\>Pre/Post | Hooks called before/after the corresponding operator is executed. |
+
+## Arguments of Hooks
+
+Arguments are implemented with built-in Python types.
+
+Tensor
+- Type: dict
+- {name:str, data: np.ndarray, quantparam: QuantParam, is_const: bool}
+
+QuantParam
+- Type: dict
+- {scale: list, zero_point: list, quantized_dimension: int}
+
+Padding
+- Type: string
+- Values: 'SAME', 'VALID'
+
+Stride
+- Type: dict
+- {w: int, h: int}
+
+Dilation
+- Type: dict
+- {w: int, h: int}
+
+FusedActivationFunction
+- Type: string
+- Values: 'none', 'relu', 'relu_n1_to_1', 'relu6'
+
+## What's different from Hook APIs in Tensorflow or Pytorch?
+
+Basically, dalgona works in the same way as Hooks in TF or Pytorch. It calls user-defined functions before/after each operator is executed.
+
+A major difference is that dalgona runs with a model desinged for inference (i.e., circle, which can be directly converted from tflite).
--- /dev/null
+# Copyright 2022 Samsung Electronics Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+A template for anlaysis code.
+This template shows how to access the information of each operator inside hooks.
+Users can write their own hooks by modifying this file.
+
+NOTE See "Arguments of Hooks" section in README to understand argument types (Tensor, Stride, ..)
+NOTE See "tests/SingleOperatorTest.py" for more operators.
+"""
+
+
+class AnalysisTemplate(object):
+ def StartAnalysis(self, args: str):
+ """
+ Called when the analysis starts
+ args: string given by --analysis_args option
+ """
+ print("Analysis started.")
+ print("args", args)
+
+ def EndAnalysis(self):
+ """
+ Called when the analysis ends
+ """
+ print("Analysis ended.")
+
+ def StartNetworkExecution(self, inputs: list):
+ """
+ Called when the execution of a network starts
+ inputs: list of Tensor
+ """
+ print("Network execution started.")
+
+ def EndNetworkExecution(self, outputs: list):
+ """
+ Called when the execution of a network ends
+ outputs: list of Tensor
+ """
+ print("Network execution ended.")
+
+ def DefaultOpPre(self, name: str, opcode: str, inputs: list):
+ """
+ Default hook called before an operator is executed
+ name: output tensor name (string)
+ opcode: opcode name (string)
+ inputs: list of Tensor
+ """
+ print("name", name)
+ print("opcode", opcode)
+ print("inputs", inputs)
+
+ def DefaultOpPost(self, name: str, opcode: str, inputs: list, output: dict):
+ """
+ Default hook called after an operator is executed
+ name: output tensor name (string)
+ opcode: opcode name (string)
+ inputs: list of Tensor
+ output: Tensor
+ """
+ print("name", name)
+ print("opcode", opcode)
+ print("inputs", inputs)
+ print("output", output)
+
+ def Conv2DPre(self, name: str, input: dict, filter: dict, bias: dict, padding: str,
+ stride: dict, dilation: dict, fused_act: str):
+ """
+ Called before Conv2D layer execution
+ name: output tensor name (string)
+ opcode: opcode name (string)
+ input: Tensor
+ filter: Tensor
+ bias: Tensor
+ padding: Padding (string)
+ stride: Stride
+ dilation: Dilation
+ fused_act: Fused activation functions (string)
+ """
+ print("name", name)
+ print("input", input)
+ print("filter", filter)
+ print("bias", bias)
+ print("padding", padding)
+ print("stride", stride)
+ print("dilation", dilation)
+ print("fused activation", fused_act)
+
+ def Conv2DPost(self, name: str, input: dict, filter: dict, bias: dict, padding: str,
+ stride: dict, dilation: dict, output: dict, fused_act: str):
+ """
+ Called after Conv2D layer execution
+ name: output tensor name (string)
+ opcode: opcode name (string)
+ input: Tensor
+ filter: Tensor
+ bias: Tensor
+ padding: Padding (string)
+ stride: Stride
+ dilation: Dilation
+ output: Tensor
+ fused_act: Fused activation functions (string)
+ """
+ print("name", name)
+ print("input", input)
+ print("filter", filter)
+ print("bias", bias)
+ print("padding", padding)
+ print("stride", stride)
+ print("dilation", dilation)
+ print("output shape", output['data'].shape)
+ print("output type", output['data'].dtype)
+ print("fused activation", fused_act)
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalgona.h"
+
+#include <arser/arser.h>
+#include <pybind11/embed.h>
+
+namespace py = pybind11;
+
+using namespace dalgona;
+
+int entry(const int argc, char **argv)
+{
+ arser::Arser arser("Dalgona: Dynamic analysis tool for DNN");
+
+ arser.add_argument("--input_model")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(true)
+ .help("Input model filepath (.circle)");
+
+ arser.add_argument("--input_data")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("Input data filepath (.h5) (if not given, random data will be used)");
+
+ arser.add_argument("--analysis")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(true)
+ .help("Analysis code filepath (.py)");
+
+ arser.add_argument("--analysis_args")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("String argument passed to the analysis code");
+
+ try
+ {
+ arser.parse(argc, argv);
+ }
+ catch (const std::runtime_error &err)
+ {
+ std::cout << err.what() << std::endl;
+ std::cout << arser;
+ return EXIT_FAILURE;
+ }
+
+ auto input_model_path = arser.get<std::string>("--input_model");
+ auto analysis_path = arser.get<std::string>("--analysis");
+ std::string analysis_args = "";
+ if (arser["--analysis_args"])
+ analysis_args = arser.get<std::string>("--analysis_args");
+
+ // Initialize python interpreter
+ py::scoped_interpreter guard{};
+
+ Dalgona dalgona;
+
+ // Initialize interpreter and operator hooks
+ dalgona.initialize(input_model_path);
+
+ // Run analysis
+ if (arser["--input_data"])
+ {
+ const auto input_data_path = arser.get<std::string>("--input_data");
+ dalgona.runAnalysisWithH5Input(input_data_path, analysis_path, analysis_args);
+ }
+ else
+ {
+ std::cout << "--input_data was not specified. Run with a random input." << std::endl;
+ dalgona.runAnalysisWithRandomInput(analysis_path, analysis_args);
+ }
+
+ return EXIT_SUCCESS;
+}
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_H__
+#define __DALGONA_H__
+
+#include <luci/IR/Module.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include "PythonHooks.h"
+
+#include <memory>
+
+namespace dalgona
+{
+
+class Dalgona
+{
+public:
+ explicit Dalgona() = default;
+
+ ~Dalgona() = default;
+
+ void initialize(const std::string &input_model_path);
+
+ // Run analysis with hdf5 input
+ void runAnalysisWithH5Input(const std::string &input_data_path, const std::string &analysis_path,
+ const std::string &analysis_args);
+
+ // Run analysis with random input
+ void runAnalysisWithRandomInput(const std::string &analysis_path,
+ const std::string &analysis_args);
+
+private:
+ std::unique_ptr<luci::Module> _module{nullptr};
+ std::unique_ptr<luci_interpreter::Interpreter> _interpreter{nullptr};
+ std::unique_ptr<PythonHooks> _hooks{nullptr};
+};
+
+} // namespace dalgona
+
+#endif // __DALGONA_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_PYTHON_HOOKS_H__
+#define __DALGONA_PYTHON_HOOKS_H__
+
+#include <loco/IR/Graph.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include <pybind11/embed.h>
+
+#include <string>
+
+namespace py = pybind11;
+
+namespace dalgona
+{
+
+class PythonHooks : public luci_interpreter::ExecutionObserver
+{
+public:
+ PythonHooks(luci_interpreter::Interpreter *interpreter) : _interpreter(interpreter)
+ {
+ // Do nothing
+ }
+
+ // Called when the analysis starts
+ void importAnalysis(const std::string &analysis_path, py::object &globals,
+ const std::string &analysis_args);
+
+ // Called after the analysis is done
+ void endAnalysis();
+
+ // Called before a network is started to be executed
+ void startNetworkExecution(loco::Graph *graph);
+
+ // Called after a network is executed
+ void endNetworkExecution(loco::Graph *graph);
+
+ // Called before an operator is executed
+ void preOperatorExecute(const luci::CircleNode *node) override;
+
+ // Called after an operator is executed
+ void postOperatorExecute(const luci::CircleNode *node) override;
+
+private:
+ luci_interpreter::Interpreter *_interpreter = nullptr;
+ py::object _analysis;
+};
+
+} // namespace dalgona
+
+#endif // __DALGONA_PYTHON_HOOKS_H__
--- /dev/null
+require("safemain")
+require("arser")
+require("foder")
+require("luci")
+require("luci-interpreter")
+require("dio-hdf5")
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalgona.h"
+#include "PythonHooks.h"
+#include "RandomUtils.h"
+
+#include <luci/Importer.h>
+#include <foder/FileLoader.h>
+#include <dio_hdf5/HDF5Importer.h>
+
+#include <pybind11/embed.h>
+
+#include <iostream>
+#include <limits>
+
+using Shape = std::vector<loco::Dimension>;
+using DataType = loco::DataType;
+
+namespace py = pybind11;
+
+namespace
+{
+
+uint32_t numElements(const luci::CircleNode *node)
+{
+ assert(node != nullptr); // FIX_CALLER_UNLESS
+
+ uint32_t num_elements = 1;
+ for (uint32_t i = 0; i < node->rank(); i++)
+ num_elements *= node->dim(i).value();
+
+ return num_elements;
+}
+
+// Return tensor's size in bytes
+template <typename NodeT> size_t getByteSize(const NodeT *node)
+{
+ assert(node != nullptr); // FIX_CALLER_UNLESS
+
+ uint32_t dtype_size = loco::size(node->dtype());
+ return static_cast<size_t>(dtype_size) * static_cast<size_t>(numElements(node));
+}
+
+// Throw exception if input has one of the following conditions.
+// 1. Have unknown dimension
+// 2. Number of elements is 0
+void checkInputDimension(const luci::CircleInput *input)
+{
+ assert(input != nullptr); // FIX_CALLER_UNLESS
+
+ for (uint32_t i = 0; i < input->rank(); i++)
+ if (!input->dim(i).known())
+ throw std::runtime_error(input->name() + " has unknown dimension");
+
+ if (numElements(input) == 0)
+ throw std::runtime_error(input->name() + " is a zero-sized input");
+}
+
+// Check the type and the shape of CircleInput
+// Throw an exception if type or shape does not match
+void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape)
+{
+ assert(input_node != nullptr); // FIX_CALLER_UNLESS
+
+ // Type check
+ if (dtype != input_node->dtype())
+ throw std::runtime_error("Wrong input type.");
+
+ if (shape.size() != input_node->rank())
+ throw std::runtime_error("Input rank mismatch.");
+
+ for (uint32_t i = 0; i < shape.size(); i++)
+ {
+ if (not(shape.at(i) == input_node->dim(i)))
+ throw std::runtime_error("Input shape mismatch.");
+ }
+}
+
+} // namespace
+
+namespace dalgona
+{
+
+void Dalgona::initialize(const std::string &input_model_path)
+{
+ // Load model from the file
+ foder::FileLoader loader{input_model_path};
+ std::vector<char> model_data = loader.load();
+
+ // Verify flatbuffers
+ flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model_data.data()),
+ model_data.size()};
+ if (not circle::VerifyModelBuffer(verifier))
+ throw std::runtime_error("Failed to verify circle '" + input_model_path + "'");
+
+ auto circle_model = circle::GetModel(model_data.data());
+
+ if (not circle_model)
+ throw std::runtime_error("Failed to load '" + input_model_path + "'");
+
+ _module = luci::Importer().importModule(circle_model);
+
+ if (not _module)
+ throw std::runtime_error("ERROR: Failed to load '" + input_model_path + "'");
+
+ // Initialize interpreter
+ _interpreter = std::make_unique<luci_interpreter::Interpreter>(_module.get());
+
+ _hooks = std::make_unique<PythonHooks>(_interpreter.get());
+
+ _interpreter->attachObserver(_hooks.get());
+}
+
+void Dalgona::runAnalysisWithH5Input(const std::string &input_data_path,
+ const std::string &analysis_path,
+ const std::string &analysis_args)
+{
+ py::object scope = py::module::import("__main__").attr("__dict__");
+ _hooks->importAnalysis(analysis_path, scope, analysis_args);
+
+ try
+ {
+ dio::hdf5::HDF5Importer importer(input_data_path);
+ importer.importGroup("value");
+
+ bool is_raw_data = importer.isRawData();
+
+ const auto num_records = importer.numData();
+ if (num_records == 0)
+ throw std::runtime_error("The input data file does not contain any record.");
+
+ const auto input_nodes = loco::input_nodes(_module->graph());
+ const auto num_inputs = input_nodes.size();
+
+ for (int32_t record_idx = 0; record_idx < num_records; record_idx++)
+ {
+ if (num_inputs != static_cast<uint32_t>(importer.numInputs(record_idx)))
+ throw std::runtime_error("Wrong number of inputs.");
+
+ std::cout << "Running " << record_idx << "'th data" << std::endl;
+
+ for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+ {
+ const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+ assert(input_node->index() == input_idx);
+ checkInputDimension(input_node);
+ std::vector<char> input_data(getByteSize(input_node));
+
+ if (is_raw_data)
+ {
+ // Skip type/shape check for raw data
+ importer.readTensor(record_idx, input_idx, input_data.data());
+ }
+ else
+ {
+ DataType dtype;
+ Shape shape;
+ importer.readTensor(record_idx, input_idx, &dtype, &shape, input_data.data());
+
+ // Check the type and the shape of the input data is valid
+ verifyTypeShape(input_node, dtype, shape);
+ }
+
+ _interpreter->writeInputTensor(input_node, input_data.data(), input_data.size());
+ }
+
+ _hooks->startNetworkExecution(_module->graph());
+ _interpreter->interpret();
+ _hooks->endNetworkExecution(_module->graph());
+ }
+
+ std::cout << "Finished executing " << num_records << "'th data" << std::endl;
+ _hooks->endAnalysis();
+ }
+ catch (const H5::Exception &e)
+ {
+ H5::Exception::printErrorStack();
+ throw std::runtime_error("HDF5 error occurred.");
+ }
+}
+
+void Dalgona::runAnalysisWithRandomInput(const std::string &analysis_path,
+ const std::string &analysis_args)
+{
+ py::object scope = py::module::import("__main__").attr("__dict__");
+ _hooks->importAnalysis(analysis_path, scope, analysis_args);
+
+ const auto input_nodes = loco::input_nodes(_module->graph());
+ const auto num_inputs = input_nodes.size();
+
+ for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+ {
+ const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+ assert(input_node->index() == input_idx);
+ checkInputDimension(input_node);
+
+ uint32_t num_elems = numElements(input_node);
+ switch (input_node->dtype())
+ {
+ case DataType::FLOAT32:
+ {
+ // Synced with record-minmax (-5,5)
+ auto input_data = genRandomFloatData(num_elems, -5, 5);
+ _interpreter->writeInputTensor(input_node, input_data.data(),
+ input_data.size() * sizeof(float));
+ break;
+ }
+ case DataType::U8:
+ {
+ auto input_data = genRandomIntData<uint8_t>(num_elems, std::numeric_limits<uint8_t>::min(),
+ std::numeric_limits<uint8_t>::max());
+ _interpreter->writeInputTensor(input_node, input_data.data(),
+ input_data.size() * sizeof(uint8_t));
+ break;
+ }
+ case DataType::S16:
+ {
+ auto input_data = genRandomIntData<int16_t>(num_elems, std::numeric_limits<int16_t>::min(),
+ std::numeric_limits<int16_t>::max());
+ _interpreter->writeInputTensor(input_node, input_data.data(),
+ input_data.size() * sizeof(int16_t));
+ break;
+ }
+ case DataType::S32:
+ {
+ // Synced with record-minmax (0, 100)
+ auto input_data = genRandomIntData<int32_t>(num_elems, 0, 100);
+ _interpreter->writeInputTensor(input_node, input_data.data(),
+ input_data.size() * sizeof(int32_t));
+ break;
+ }
+ case DataType::S64:
+ {
+ // Synced with record-minmax (0, 100)
+ auto input_data = genRandomIntData<int64_t>(num_elems, 0, 100);
+ _interpreter->writeInputTensor(input_node, input_data.data(),
+ input_data.size() * sizeof(int64_t));
+ break;
+ }
+ case DataType::BOOL:
+ {
+ // Bool is represented as uint8 (0 or 1)
+ auto input_data = genRandomIntData<uint8_t>(num_elems, 0, 1);
+ _interpreter->writeInputTensor(input_node, input_data.data(),
+ input_data.size() * sizeof(uint8_t));
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported input data type in " + input_node->name());
+ }
+ }
+
+ _hooks->startNetworkExecution(_module->graph());
+ _interpreter->interpret();
+ _hooks->endNetworkExecution(_module->graph());
+
+ std::cout << "Finished executing a random input" << std::endl;
+ _hooks->endAnalysis();
+}
+
+} // namespace dalgona
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_POST_OPERATOR_HOOK_H__
+#define __DALGONA_POST_OPERATOR_HOOK_H__
+
+#include "Utils.h"
+#include "StringUtils.h"
+
+#include <loco/IR/Node.h>
+#include <luci_interpreter/Interpreter.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <pybind11/embed.h>
+#include <vector>
+
+namespace py = pybind11;
+using namespace py::literals;
+
+namespace dalgona
+{
+
+// Invoke a user-written Python hook after an operator is executed
+class PostOperatorHook final : public luci::CircleNodeVisitor<void>
+{
+
+// This macro creates three variables used for post-operator hooks.
+// 1. hook: Python function to be invoked (type: py::object)
+// 2. inputs: input data (type: std::vector of numpy array)
+// 3. output: output data (type: numpy array)
+#define POST_OPERATOR_HOOK_PROLOGUE(OP_NAME) \
+ if (!py::hasattr(_analysis, #OP_NAME "Post")) \
+ { \
+ visit(loco::must_cast<const luci::CircleNode *>(node)); \
+ return; \
+ } \
+ py::object hook = _analysis.attr(#OP_NAME "Post"); \
+ auto inputs = inputsPyArray(node, _interpreter); \
+ auto output = outputPyArray(node, _interpreter);
+
+// Multi-output version of POST_OPERATOR_HOOK_PROLOGUE
+#define POST_OPERATOR_HOOK_PROLOGUE_MULTI_OUTS(OP_NAME) \
+ if (!py::hasattr(_analysis, #OP_NAME "Post")) \
+ { \
+ visit(loco::must_cast<const luci::CircleNode *>(node)); \
+ return; \
+ } \
+ py::object hook = _analysis.attr(#OP_NAME "Post"); \
+ auto inputs = inputsPyArray(node, _interpreter); \
+ auto outputs = outputsPyArray(node, _interpreter);
+
+private:
+ py::object _analysis;
+ luci_interpreter::Interpreter *_interpreter{nullptr};
+
+public:
+ explicit PostOperatorHook(py::object analysis, luci_interpreter::Interpreter *interpreter)
+ : _analysis(analysis), _interpreter(interpreter)
+ {
+ // Do nothing
+ }
+
+ // default
+ void visit(const luci::CircleNode *node)
+ {
+ if (not py::hasattr(_analysis, "DefaultOpPost"))
+ return;
+
+ py::object hook = _analysis.attr("DefaultOpPost");
+ auto inputs = inputsPyArray(node, _interpreter);
+ auto output = outputPyArray(node, _interpreter);
+
+ py::list input_list;
+ for (uint32_t i = 0; i < inputs.size(); i++)
+ {
+ input_list.append(inputs[i]);
+ }
+
+ pySafeCall(hook,
+ node->name(), // name
+ toString(node->opcode()), // opcode
+ input_list, // list of inputs
+ output // output
+ );
+ }
+
+ void visit(const luci::CircleConv2D *node)
+ {
+ POST_OPERATOR_HOOK_PROLOGUE(Conv2D)
+
+ auto padding = node->padding();
+ auto stride = node->stride();
+ auto dilation = node->dilation();
+
+ auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+ auto py_dilation = py::dict("w"_a = dilation->w(), "h"_a = dilation->h());
+
+ auto fused_act = node->fusedActivationFunction();
+
+ pySafeCall(hook,
+ node->name(), // name
+ inputs[0], // input
+ inputs[1], // filter
+ inputs[2], // bias
+ padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+ py_stride, // stride
+ py_dilation, // dilation
+ output, // output
+ toString(fused_act) // fused activation
+ );
+ }
+
+ void visit(const luci::CircleDepthwiseConv2D *node)
+ {
+ POST_OPERATOR_HOOK_PROLOGUE(DepthwiseConv2D)
+
+ auto padding = node->padding();
+ auto stride = node->stride();
+ auto dilation = node->dilation();
+ auto depthMultiplier = node->depthMultiplier();
+
+ auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+ auto py_dilation = py::dict("w"_a = dilation->w(), "h"_a = dilation->h());
+
+ auto fused_act = node->fusedActivationFunction();
+
+ pySafeCall(hook,
+ node->name(), // name
+ inputs[0], // input
+ inputs[1], // filter
+ inputs[2], // bias
+ padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+ py_stride, // stride
+ depthMultiplier, // depthMultiplier
+ py_dilation, // dilation
+ output, // output
+ toString(fused_act) // fused activation
+ );
+ }
+
+ void visit(const luci::CircleAdd *node)
+ {
+ POST_OPERATOR_HOOK_PROLOGUE(Add)
+
+ auto fused_act = node->fusedActivationFunction();
+
+ pySafeCall(hook,
+ node->name(), // name
+ inputs[0], // x
+ inputs[1], // y
+ output, // output
+ toString(fused_act) // fused activation
+ );
+ }
+
+ void visit(const luci::CircleFullyConnected *node)
+ {
+ POST_OPERATOR_HOOK_PROLOGUE(FullyConnected)
+
+ auto fused_act = node->fusedActivationFunction();
+
+ pySafeCall(hook,
+ node->name(), // name
+ inputs[0], // input
+ inputs[1], // weights
+ inputs[2], // bias
+ output, // output
+ toString(fused_act) // fused activation
+ );
+ }
+
+ void visit(const luci::CircleTransposeConv *node)
+ {
+ POST_OPERATOR_HOOK_PROLOGUE(TransposeConv)
+
+ auto padding = node->padding();
+ auto stride = node->stride();
+
+ auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+
+ pySafeCall(hook,
+ node->name(), // name
+ inputs[2], // input
+ inputs[1], // filter
+ inputs[0], // output shape
+ inputs.size() == 4 ? inputs[3] : none(), // bias
+ padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+ py_stride, // stride
+ output // output
+ );
+ }
+
+ void visit(const luci::CircleInstanceNorm *node)
+ {
+ POST_OPERATOR_HOOK_PROLOGUE(InstanceNorm)
+
+ auto epsilon = node->epsilon();
+
+ auto fused_act = node->fusedActivationFunction();
+
+ pySafeCall(hook,
+ node->name(), // name
+ inputs[0], // input
+ inputs[1], // gamma
+ inputs[2], // beta
+ epsilon, // epsilon
+ output, // output
+ toString(fused_act) // fused activation
+ );
+ }
+
+ void visit(const luci::CircleSplit *node)
+ {
+ POST_OPERATOR_HOOK_PROLOGUE_MULTI_OUTS(Split)
+
+ py::list output_list;
+ for (uint32_t i = 0; i < outputs.size(); i++)
+ {
+ output_list.append(outputs[i]);
+ }
+
+ auto num_split = node->num_split();
+
+ pySafeCall(hook,
+ node->name(), // name
+ inputs[0], // split_dim
+ inputs[1], // input
+ num_split, // num_split
+ output_list // list of outputs
+ );
+ }
+
+#undef POST_OPERATOR_HOOK_PROLOGUE_MULTI_OUTS
+};
+
+} // namespace dalgona
+
+#endif // __DALGONA_POST_OPERATOR_HOOK_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_PRE_OPERATOR_HOOK_H__
+#define __DALGONA_PRE_OPERATOR_HOOK_H__
+
+#include "Utils.h"
+#include "StringUtils.h"
+
+#include <loco/IR/Node.h>
+#include <luci_interpreter/Interpreter.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <pybind11/embed.h>
+#include <vector>
+
+namespace py = pybind11;
+using namespace py::literals;
+
+namespace dalgona
+{
+
+// Invoke a user-written Python hook before an operator is executed
+class PreOperatorHook final : public luci::CircleNodeVisitor<void>
+{
+
+// This macro creates two variables used for pre-operator hooks.
+// 1. hook: Python function to be invoked (type: py::object)
+// 2. inputs: input data (type: std::vector of numpy array)
+#define PRE_OPERATOR_HOOK_PROLOGUE(OP_NAME) \
+ if (!py::hasattr(_analysis, #OP_NAME "Pre")) \
+ { \
+ visit(loco::must_cast<const luci::CircleNode *>(node)); \
+ return; \
+ } \
+ py::object hook = _analysis.attr(#OP_NAME "Pre"); \
+ auto inputs = inputsPyArray(node, _interpreter);
+
+private:
+ py::object _analysis;
+ luci_interpreter::Interpreter *_interpreter{nullptr};
+
+public:
+ explicit PreOperatorHook(py::object analysis, luci_interpreter::Interpreter *interpreter)
+ : _analysis(analysis), _interpreter(interpreter)
+ {
+ // Do nothing
+ }
+
+ // default
+ void visit(const luci::CircleNode *node)
+ {
+ if (not py::hasattr(_analysis, "DefaultOpPre"))
+ return;
+
+ py::object hook = _analysis.attr("DefaultOpPre");
+ auto inputs = inputsPyArray(node, _interpreter);
+
+ py::list input_list;
+ for (uint32_t i = 0; i < inputs.size(); i++)
+ {
+ input_list.append(inputs[i]);
+ }
+
+ pySafeCall(hook,
+ node->name(), // name
+ toString(node->opcode()), // opcode
+ input_list // list of inputs
+ );
+ }
+
+ void visit(const luci::CircleConv2D *node)
+ {
+ PRE_OPERATOR_HOOK_PROLOGUE(Conv2D)
+
+ auto padding = node->padding();
+ auto stride = node->stride();
+ auto dilation = node->dilation();
+
+ auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+ auto py_dilation = py::dict("w"_a = dilation->w(), "h"_a = dilation->h());
+
+ auto fused_act = node->fusedActivationFunction();
+
+ pySafeCall(hook,
+ node->name(), // name
+ inputs[0], // input
+ inputs[1], // filter
+ inputs[2], // bias
+ padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+ py_stride, // stride
+ py_dilation, // dilation
+ toString(fused_act) // fused activation
+ );
+ }
+
+ void visit(const luci::CircleDepthwiseConv2D *node)
+ {
+ PRE_OPERATOR_HOOK_PROLOGUE(DepthwiseConv2D)
+
+ auto padding = node->padding();
+ auto stride = node->stride();
+ auto dilation = node->dilation();
+ auto depthMultiplier = node->depthMultiplier();
+
+ auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+ auto py_dilation = py::dict("w"_a = dilation->w(), "h"_a = dilation->h());
+
+ auto fused_act = node->fusedActivationFunction();
+
+ pySafeCall(hook,
+ node->name(), // name
+ inputs[0], // input
+ inputs[1], // filter
+ inputs[2], // bias
+ padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+ py_stride, // stride
+ depthMultiplier, // depthMultiplier
+ py_dilation, // dilation
+ toString(fused_act) // fused activation
+ );
+ }
+
+ void visit(const luci::CircleAdd *node)
+ {
+ PRE_OPERATOR_HOOK_PROLOGUE(Add)
+
+ auto fused_act = node->fusedActivationFunction();
+
+ pySafeCall(hook,
+ node->name(), // name
+ inputs[0], // x
+ inputs[1], // y
+ toString(fused_act) // fused activation
+ );
+ }
+
+ void visit(const luci::CircleFullyConnected *node)
+ {
+ PRE_OPERATOR_HOOK_PROLOGUE(FullyConnected)
+
+ auto fused_act = node->fusedActivationFunction();
+
+ pySafeCall(hook,
+ node->name(), // name
+ inputs[0], // input
+ inputs[1], // weights
+ inputs[2], // bias
+ toString(fused_act) // fused activation
+ );
+ }
+
+ void visit(const luci::CircleTransposeConv *node)
+ {
+ PRE_OPERATOR_HOOK_PROLOGUE(TransposeConv)
+
+ auto padding = node->padding();
+ auto stride = node->stride();
+
+ auto py_stride = py::dict("w"_a = stride->w(), "h"_a = stride->h());
+
+ pySafeCall(hook,
+ node->name(), // name
+ inputs[2], // input
+ inputs[1], // filter
+ inputs[0], // output shape
+ inputs.size() == 4 ? inputs[3] : none(), // bias
+ padding == luci::Padding::SAME ? "SAME" : "VALID", // padding
+ py_stride // stride
+ );
+ }
+
+ void visit(const luci::CircleInstanceNorm *node)
+ {
+ PRE_OPERATOR_HOOK_PROLOGUE(InstanceNorm)
+
+ auto epsilon = node->epsilon();
+
+ auto fused_act = node->fusedActivationFunction();
+
+ pySafeCall(hook,
+ node->name(), // name
+ inputs[0], // input
+ inputs[1], // gamma
+ inputs[2], // beta
+ epsilon, // epsilon
+ toString(fused_act) // fused activation
+ );
+ }
+
+ void visit(const luci::CircleSplit *node)
+ {
+ PRE_OPERATOR_HOOK_PROLOGUE(Split)
+
+ auto num_split = node->num_split();
+
+ pySafeCall(hook,
+ node->name(), // name
+ inputs[0], // split_dim
+ inputs[1], // input
+ num_split // num_split
+ );
+ }
+};
+
+} // namespace dalgona
+
+#endif // __DALGONA_PRE_OPERATOR_HOOK_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PythonHooks.h"
+#include "PostOperatorHook.h"
+#include "PreOperatorHook.h"
+#include "Utils.h"
+
+#include <loco/IR/Graph.h>
+
+namespace dalgona
+{
+
+void PythonHooks::preOperatorExecute(const luci::CircleNode *node)
+{
+ PreOperatorHook hook(_analysis, _interpreter);
+ node->accept(&hook);
+}
+
+void PythonHooks::postOperatorExecute(const luci::CircleNode *node)
+{
+ PostOperatorHook hook(_analysis, _interpreter);
+ node->accept(&hook);
+}
+
+void PythonHooks::importAnalysis(const std::string &analysis_path, py::object &globals,
+ const std::string &analysis_args)
+{
+ const auto base_filename = analysis_path.substr(analysis_path.find_last_of("/\\") + 1);
+ // module name must be the same with the python code
+ // ex: base_filename = MyAnalysis.py -> module_name = MyAnalysis
+ const auto module_name = base_filename.substr(0, base_filename.find_last_of('.'));
+
+ py::dict locals;
+ locals["path"] = py::cast(analysis_path);
+
+ py::eval<py::eval_statements>("import sys\n"
+ "import os\n"
+ "sys.path.append(os.path.dirname(path))\n"
+ "import " +
+ module_name +
+ "\n"
+ "analysis = " +
+ module_name + "." + module_name + "()",
+ globals, locals);
+
+ _analysis = locals["analysis"];
+
+ if (py::hasattr(_analysis, "StartAnalysis"))
+ pySafeCall(_analysis.attr("StartAnalysis"), analysis_args);
+}
+
+void PythonHooks::startNetworkExecution(loco::Graph *graph)
+{
+ if (!py::hasattr(_analysis, "StartNetworkExecution"))
+ return;
+
+ assert(graph != nullptr); // FIX_CALLER_UNLESS
+
+ const auto input_nodes = loco::input_nodes(graph);
+ py::list inputs;
+ // Assumption: input_nodes is iterated in the same order of model inputs
+ for (const auto input_node : input_nodes)
+ {
+ auto circle_node = loco::must_cast<luci::CircleInput *>(input_node);
+ inputs.append(outputPyArray(circle_node, _interpreter));
+ }
+ pySafeCall(_analysis.attr("StartNetworkExecution"), inputs);
+}
+
+void PythonHooks::endNetworkExecution(loco::Graph *graph)
+{
+ if (!py::hasattr(_analysis, "EndNetworkExecution"))
+ return;
+
+ assert(graph != nullptr); // FIX_CALLER_UNLESS
+
+ const auto output_nodes = loco::output_nodes(graph);
+ py::list outputs;
+ // Assumption: output_nodes is iterated in the same order of model outputs
+ for (const auto output_node : output_nodes)
+ {
+ auto circle_node = loco::must_cast<luci::CircleOutput *>(output_node);
+ outputs.append(
+ outputPyArray(loco::must_cast<luci::CircleNode *>(circle_node->from()), _interpreter));
+ }
+ pySafeCall(_analysis.attr("EndNetworkExecution"), outputs);
+}
+
+void PythonHooks::endAnalysis()
+{
+ if (py::hasattr(_analysis, "EndAnalysis"))
+ pySafeCall(_analysis.attr("EndAnalysis"));
+}
+
+} // namespace dalgona
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RandomUtils.h"
+
+#include <random>
+#include <vector>
+#include <cassert>
+
+namespace dalgona
+{
+
+std::vector<float> genRandomFloatData(uint32_t num_elements, float min, float max)
+{
+ if (min > max)
+ throw std::invalid_argument("min is greater than max");
+
+ std::random_device rd;
+ std::mt19937 gen(rd());
+ std::uniform_real_distribution<> dist(min, max);
+ std::vector<float> buffer(num_elements);
+
+ // Write random data
+ for (auto &iter : buffer)
+ iter = static_cast<float>(dist(gen));
+
+ return buffer;
+}
+
+} // namespace dalgona
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_RANDOM_UTILS_H__
+#define __DALGONA_RANDOM_UTILS_H__
+
+#include <cstdint>
+#include <vector>
+#include <random>
+#include <stdexcept>
+
+namespace dalgona
+{
+
+template <typename T> std::vector<T> genRandomIntData(uint32_t num_elements, T min, T max)
+{
+ if (min > max)
+ throw std::invalid_argument("min is greater than max");
+
+ std::random_device rd;
+ std::mt19937 gen(rd());
+ std::uniform_int_distribution<T> dist(min, max);
+ std::vector<T> buffer(num_elements);
+
+ // Write random data
+ for (auto &iter : buffer)
+ iter = dist(gen);
+
+ return buffer;
+}
+
+std::vector<float> genRandomFloatData(uint32_t num_elements, float min, float max);
+
+} // namespace dalgona
+
+#endif // __DALGONA_RANDOM_UTILS_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RandomUtils.h"
+
+#include <gtest/gtest.h>
+
+using namespace dalgona;
+
+TEST(DalgonaUtilTest, gen_random_int32)
+{
+ const uint32_t num_elements = 10;
+ const int32_t min = -5;
+ const int32_t max = 5;
+ std::vector<int32_t> buffer = genRandomIntData<int32_t>(num_elements, min, max);
+
+ EXPECT_EQ(num_elements, buffer.size());
+ for (auto val : buffer)
+ {
+ EXPECT_TRUE(val >= min and val <= max);
+ }
+}
+
+TEST(DalgonaUtilTest, gen_random_int32_NEG)
+{
+ const uint32_t num_elements = 10;
+ const int32_t min = 5;
+ const int32_t max = -5;
+ EXPECT_ANY_THROW(genRandomIntData<int32_t>(num_elements, min, max));
+}
+
+TEST(DalgonaUtilTest, gen_random_float)
+{
+ const uint32_t num_elements = 10;
+ const float min = -5;
+ const float max = 5;
+ std::vector<float> buffer = genRandomFloatData(num_elements, min, max);
+
+ EXPECT_EQ(num_elements, buffer.size());
+ for (auto val : buffer)
+ {
+ EXPECT_TRUE(val >= min and val <= max);
+ }
+}
+
+TEST(DalgonaUtilTest, gen_random_float_NEG)
+{
+ const uint32_t num_elements = 10;
+ const float min = 5;
+ const float max = -5;
+ EXPECT_ANY_THROW(genRandomFloatData(num_elements, min, max));
+}
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StringUtils.h"
+
+#include <luci/IR/CircleNodeDecl.h>
+
+#include <cassert>
+
+namespace dalgona
+{
+
+const std::string toString(luci::CircleOpcode opcode)
+{
+ static const char *names[] = {
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) #CIRCLE_CLASS,
+#define CIRCLE_VNODE(OPCODE, CIRCLE_CLASS) #CIRCLE_CLASS,
+#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_NODE
+#undef CIRCLE_VNODE
+ };
+
+ auto const node_name = names[static_cast<int>(opcode)];
+
+ assert(std::string(node_name).substr(0, 6) == "Circle"); // FIX_ME_UNLESS
+
+ // Return substring of class name ("Circle" is sliced out)
+ // Ex: Return "Conv2D" for "CircleConv2D" node
+ return std::string(node_name).substr(6);
+}
+
+const std::string toString(luci::FusedActFunc fused_act)
+{
+ switch (fused_act)
+ {
+ case (luci::FusedActFunc::UNDEFINED):
+ return std::string("undefined");
+ case (luci::FusedActFunc::NONE):
+ return std::string("none");
+ case (luci::FusedActFunc::RELU):
+ return std::string("relu");
+ case (luci::FusedActFunc::RELU_N1_TO_1):
+ return std::string("relu_n1_to_1");
+ case (luci::FusedActFunc::RELU6):
+ return std::string("relu6");
+ case (luci::FusedActFunc::TANH):
+ return std::string("tanh");
+ case (luci::FusedActFunc::SIGN_BIT):
+ return std::string("sign_bit");
+ default:
+ throw std::runtime_error("Unsupported activation function");
+ }
+}
+
+} // namespace dalgona
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_STRING_UTILS_H__
+#define __DALGONA_STRING_UTILS_H__
+
+#include <luci/IR/CircleOpcode.h>
+#include <luci/IR/AttrFusedActFunc.h>
+
+#include <string>
+
+namespace dalgona
+{
+
+const std::string toString(luci::CircleOpcode opcode);
+
+const std::string toString(luci::FusedActFunc fused_act);
+
+} // namespace dalgona
+
+#endif // __DALGONA_STRING_UTILS_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StringUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/AttrFusedActFunc.h>
+
+#include <gtest/gtest.h>
+
+using namespace dalgona;
+
+TEST(DalgonaUtilTest, toString_basic)
+{
+ luci::CircleConv2D node;
+
+ EXPECT_EQ("Conv2D", toString(node.opcode()));
+}
+
+TEST(DalgonaUtilTest, toString_fused_act_func)
+{
+ EXPECT_EQ("undefined", toString(luci::FusedActFunc::UNDEFINED));
+ EXPECT_EQ("none", toString(luci::FusedActFunc::NONE));
+ EXPECT_EQ("relu", toString(luci::FusedActFunc::RELU));
+ EXPECT_EQ("relu6", toString(luci::FusedActFunc::RELU6));
+ EXPECT_EQ("relu_n1_to_1", toString(luci::FusedActFunc::RELU_N1_TO_1));
+ EXPECT_EQ("tanh", toString(luci::FusedActFunc::TANH));
+ EXPECT_EQ("sign_bit", toString(luci::FusedActFunc::SIGN_BIT));
+}
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Utils.h"
+#include "StringUtils.h"
+
+#include <luci_interpreter/core/Tensor.h>
+#include <luci/IR/CircleOpcode.h>
+#include <luci/IR/CircleNodeDecl.h>
+
+#include <pybind11/numpy.h>
+#include <stdexcept>
+#include <vector>
+
+using Tensor = luci_interpreter::Tensor;
+
+namespace py = pybind11;
+using namespace py::literals;
+
+#define THROW_UNLESS(COND, MSG) \
+ if (not(COND)) \
+ throw std::runtime_error(MSG);
+
+namespace
+{
+
+py::array numpyArray(const Tensor *tensor)
+{
+ assert(tensor != nullptr); // FIX_CALLER_UNLESS
+
+ const auto tensor_shape = tensor->shape();
+
+ uint32_t size = 1;
+ std::vector<uint32_t> shape(tensor_shape.num_dims());
+ for (int i = 0; i < tensor_shape.num_dims(); i++)
+ {
+ THROW_UNLESS(tensor_shape.dim(i) >= 0, "Negative dimension detected in " + tensor->name());
+
+ shape[i] = tensor_shape.dim(i);
+ size *= shape[i];
+ }
+
+ if (size == 0)
+ return py::none();
+
+ switch (tensor->element_type())
+ {
+ case loco::DataType::FLOAT32:
+ return py::array_t<float, py::array::c_style>(shape, tensor->data<float>());
+ case loco::DataType::S16:
+ return py::array_t<int16_t, py::array::c_style>(shape, tensor->data<int16_t>());
+ case loco::DataType::S32:
+ return py::array_t<int32_t, py::array::c_style>(shape, tensor->data<int32_t>());
+ case loco::DataType::S64:
+ return py::array_t<int64_t, py::array::c_style>(shape, tensor->data<int64_t>());
+ case loco::DataType::U8:
+ return py::array_t<uint8_t, py::array::c_style>(shape, tensor->data<uint8_t>());
+ default:
+ throw std::runtime_error("Unsupported data type");
+ }
+}
+
+py::dict quantparam(const Tensor *tensor)
+{
+ assert(tensor != nullptr); // FIX_CALLER_UNLESS
+
+ auto scale = tensor->scales();
+ auto zp = tensor->zero_points();
+
+ py::list py_scale;
+ for (auto s : scale)
+ {
+ py_scale.append(s);
+ }
+
+ py::list py_zp;
+ for (auto z : zp)
+ {
+ py_zp.append(z);
+ }
+
+ auto quantparam = py::dict("scale"_a = py_scale, "zero_point"_a = py_zp,
+ "quantized_dimension"_a = tensor->quantized_dimension());
+ return quantparam;
+}
+
+} // namespace
+
+namespace dalgona
+{
+
+py::object none() { return py::none(); }
+
+std::vector<py::dict> inputsPyArray(const luci::CircleNode *node,
+ luci_interpreter::Interpreter *interpreter)
+{
+ assert(node != nullptr); // FIX_CALLER_UNLESS
+ assert(interpreter != nullptr); // FIX_CALLER_UNLESS
+
+ std::vector<py::dict> inputs;
+ for (uint32_t i = 0; i < node->arity(); ++i)
+ {
+ const auto input_tensor = interpreter->getTensor(node->arg(i));
+ auto circle_node = static_cast<luci::CircleNode *>(node->arg(i));
+
+ // skip invalid inputs (e.g., non-existing bias in TCONV)
+ if (circle_node->opcode() == luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+ continue;
+
+ auto py_input =
+ py::dict("name"_a = circle_node->name(), "data"_a = numpyArray(input_tensor),
+ "quantparam"_a = quantparam(input_tensor),
+ "is_const"_a = circle_node->opcode() == luci::CircleOpcode::CIRCLECONST);
+ inputs.push_back(py_input);
+ }
+ return inputs;
+}
+
+std::vector<py::dict> outputsPyArray(const luci::CircleNode *node,
+ luci_interpreter::Interpreter *interpreter)
+{
+ std::vector<py::dict> outputs;
+ for (auto succ : loco::succs(node))
+ {
+ const auto output_tensor = interpreter->getTensor(succ);
+ auto circle_node = static_cast<luci::CircleNode *>(succ);
+
+ auto opcode_str = toString(circle_node->opcode());
+ // Check if node is a multi-output node
+ // Assumption: Multi-output virtual nodes have 'Out' prefix
+ // TODO Fix this if the assumption changes
+ THROW_UNLESS(opcode_str.substr(opcode_str.length() - 3) == "Out",
+ "Invalid output detected in " + node->name());
+
+ auto py_output =
+ py::dict("name"_a = circle_node->name(), "data"_a = numpyArray(output_tensor),
+ "quantparam"_a = quantparam(output_tensor),
+ "is_const"_a = circle_node->opcode() == luci::CircleOpcode::CIRCLECONST);
+ outputs.push_back(py_output);
+ }
+ return outputs;
+}
+
+// Note: Only returns 1 output
+py::dict outputPyArray(const luci::CircleNode *node, luci_interpreter::Interpreter *interpreter)
+{
+ assert(node != nullptr); // FIX_CALLER_UNLESS
+ assert(interpreter != nullptr); // FIX_CALLER_UNLESS
+
+ const auto tensor = interpreter->getTensor(node);
+
+ THROW_UNLESS(tensor != nullptr, "Null tensor detected in " + node->name());
+
+ auto py_output = py::dict("name"_a = node->name(), "data"_a = numpyArray(tensor),
+ "quantparam"_a = quantparam(tensor),
+ "is_const"_a = node->opcode() == luci::CircleOpcode::CIRCLECONST);
+ return py_output;
+}
+
+} // namespace dalgona
+
+#undef THROW_UNLESS
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DALGONA_UTILS_H__
+#define __DALGONA_UTILS_H__
+
+#include <luci_interpreter/Interpreter.h>
+
+#include <pybind11/embed.h>
+
+namespace py = pybind11;
+
+namespace dalgona
+{
+
+template <typename... Args> void pySafeCall(py::object func, Args... args)
+{
+ try
+ {
+ func(args...);
+ }
+ catch (py::error_already_set &e)
+ {
+ throw std::runtime_error(e.what());
+ }
+}
+
+py::dict outputPyArray(const luci::CircleNode *node, luci_interpreter::Interpreter *interpreter);
+
+// Return a vector of Tensors(py::dict) which correspond to node's inputs
+std::vector<py::dict> inputsPyArray(const luci::CircleNode *node,
+ luci_interpreter::Interpreter *interpreter);
+
+// Return a vector of Tensors(py::dict) which correspond to the outputs of multi-out node (ex:
+// SPLIT)
+std::vector<py::dict> outputsPyArray(const luci::CircleNode *node,
+ luci_interpreter::Interpreter *interpreter);
+
+py::object none();
+
+} // namespace dalgona
+
+#endif // __DALGONA_UTILS_H__
echo ${ACTUAL}
}
+const_count()
+{
+ argc_check $# 1
+ file_path_check ${COMPILED_FILE}
+ file_path_check ${INSPECT_PROG_PATH}
+
+ set -o pipefail
+
+ RESULT=`init_error_log ; ${INSPECT_PROG_PATH} --constants ${COMPILED_FILE}`
+ check_success_exit_code $? 0
+
+ # note : grep's exit code is 2 in case of error.
+ ACTUAL=`init_error_log ; echo "${RESULT}" | grep -wc "$1"`
+ check_error_exit_code $? 2
+
+ echo ${ACTUAL}
+}
+
# TODO define more qullity test function
virtual GraphOutputIndex index(const Node *node) const = 0;
};
-// TODO Use "const Graph *"
std::vector<Node *> output_nodes(Graph *);
/**
auto pull = g->nodes()->create<loco::Pull>();
ASSERT_NO_THROW(g->nodes()->destroy(pull));
+}
+
+TEST(GraphTest, DISABLED_create_and_destroy_node_again)
+{
+ auto g = loco::make_graph();
+
+ auto pull = g->nodes()->create<loco::Pull>();
+
+ ASSERT_NO_THROW(g->nodes()->destroy(pull));
+ // NOTE calling destroy again raises Segmentation fault
+ // refer https://github.com/Samsung/ONE/issues/9968
+ // TODO fix this
ASSERT_THROW(g->nodes()->destroy(pull), std::invalid_argument);
}
ASSERT_FLOAT_EQ(test_node->f(), 11.11);
ASSERT_NO_THROW(g->nodes()->destroy(test_node));
- ASSERT_THROW(g->nodes()->destroy(test_node), std::invalid_argument);
}
TEST(GraphTest, getters_over_const_instance)
void execute_node(loco::TensorBroadcast *tensor_broadcast)
{
auto input_data = annot_data(tensor_broadcast->input());
+ if (input_data == nullptr)
+ {
+ throw std::runtime_error("Annotation is required for TensorBroadcast input");
+ }
// Calculate output shape
Shape input_shape = *(input_data->shape());
auto constant_node = pad->constant();
auto constant_data = annot_data(constant_node);
+ validate(constant_data != nullptr, "constant is not found");
validate(constant_data->dtype() == input_data->dtype(), "constant and input have same data type");
validate(constant_data->shape()->rank() == 1 && constant_data->shape()->dim(0) == 1,
"constant should have one rank with one dimension at zero axis");
--- /dev/null
+nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET)
+nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 QUIET)
+nnas_find_package(TensorFlowRuySource EXACT 2.8.0 QUIET)
+
+if(NOT TensorFlowSource_FOUND)
+ message(STATUS "Build luci-compute: FAILED (missing TensorFlowSource 2.8.0)")
+ return()
+endif(NOT TensorFlowSource_FOUND)
+
+if(NOT TensorFlowGEMMLowpSource_FOUND)
+ message(STATUS "Build luci-compute: FAILED (missing TensorFlowGEMMLowpSource 2.8.0)")
+ return()
+endif(NOT TensorFlowGEMMLowpSource_FOUND)
+
+if(NOT TensorFlowRuySource_FOUND)
+ message(STATUS "Build luci-compute: FAILED (missing TensorFlowRuySource 2.8.0)")
+ return()
+endif(NOT TensorFlowRuySource_FOUND)
+
+add_library(luci_compute INTERFACE)
+target_include_directories(luci_compute SYSTEM INTERFACE "${TensorFlowSource_DIR}")
+target_include_directories(luci_compute SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}")
+target_include_directories(luci_compute SYSTEM INTERFACE "${TensorFlowRuySource_DIR}")
--- /dev/null
+# luci-compute
+
+_luci-compute_ provides computation kernels for _luci_ and related modules.
)
add_executable(luci_eval_driver ${SRCS_EVAL_TESTER})
-target_link_libraries(luci_eval_driver PRIVATE oops)
-target_link_libraries(luci_eval_driver PRIVATE loco)
target_link_libraries(luci_eval_driver PRIVATE luci_import)
-target_link_libraries(luci_eval_driver PRIVATE luci_export)
-target_link_libraries(luci_eval_driver PRIVATE luci_lang)
target_link_libraries(luci_eval_driver PRIVATE luci_interpreter)
target_link_libraries(luci_eval_driver PRIVATE safemain)
-require("oops")
-require("loco")
require("luci")
require("luci-interpreter")
require("safemain")
#include <luci/ImporterEx.h>
#include <luci_interpreter/Interpreter.h>
-#include <luci/CircleExporter.h>
-#include <luci/CircleFileExpContract.h>
#include <cstdlib>
#include <fstream>
+#include <iostream>
#include <vector>
#include <string>
+REGISTER_KERNEL(Abs)
REGISTER_KERNEL(Add)
REGISTER_KERNEL(ArgMax)
REGISTER_KERNEL(AveragePool2D)
*/
#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
-#define LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+REGISTER_KERNEL(Abs)
REGISTER_KERNEL(Add)
REGISTER_KERNEL(ArgMax)
REGISTER_KERNEL(AveragePool2D)
REGISTER_KERNEL(PRelu)
REGISTER_KERNEL(Quantize)
REGISTER_KERNEL(ReduceMax)
+REGISTER_KERNEL(ReduceProd)
REGISTER_KERNEL(Relu)
REGISTER_KERNEL(Relu6)
REGISTER_KERNEL(Reshape)
REGISTER_KERNEL(Tanh)
REGISTER_KERNEL(Transpose)
REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(UnidirectionalSequenceLSTM)
REGISTER_KERNEL(Unpack)
REGISTER_KERNEL(While)
+REGISTER_KERNEL(Abs)
REGISTER_KERNEL(Add)
REGISTER_KERNEL(ArgMax)
REGISTER_KERNEL(AveragePool2D)
*/
#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
-#define LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
while (l < 32 && !_free_blocks[l])
l++;
- assert(l < 32);
+ if (l >= 32)
+ {
+ throw std::runtime_error{"Memory limit exceeded"};
+ }
Block *tmp;
tmp = _free_blocks[l];
int32_t stride_width;
};
+struct UnidirectionalSequenceLSTMParams
+{
+ Activation activation;
+ float cell_clip;
+ float proj_clip;
+ bool time_major;
+ bool asymmetric_quantize_inputs;
+};
+
struct UnpackParams
{
int axis;
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Abs.h"
+
+#include "kernels/Utils.h"
+
+#include <cmath> // abs for float
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Abs::Abs(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Abs::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+ output()->resize(input()->shape());
+}
+
+void Abs::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ eval<float>();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+template <typename T> void Abs::eval() const
+{
+ const auto *input_data = input()->data<T>();
+ auto *output_data = output()->data<T>();
+
+ const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+
+ for (int i = 0; i < size; ++i)
+ {
+ output_data[i] = std::abs(input_data[i]);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ABS_H
+#define LUCI_INTERPRETER_KERNELS_ABS_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Abs : public Kernel
+{
+public:
+ Abs(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ template <typename T> void eval() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ABS_H
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Abs.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<T> input_data, std::initializer_list<T> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ constexpr DataType element_type = getElementType<T>();
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(element_type);
+
+ Abs kernel(&input_tensor, &output_tensor);
+
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(AbsTest, FloatSimple)
+{
+ Check<float>(/*input_shape=*/{2, 3},
+ /*output_shape=*/{2, 3},
+ /*input_data=*/
+ {
+ 0.0f, -1.0f, 3.0f, // Row 1
+ 1.0f, -1.0f, -2.0f, // Row 2
+ },
+ /*output_data=*/
+ {
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, 1.0f, 2.0f, // Row 2
+ });
+
+ SUCCEED();
+}
+
+TEST(AbsTest, Type_Mismatch_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<loco::DataType::S32>({3}, {1, -3, 2}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+ Abs kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReduceProd.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+// Returns the number of axes that will be reduced. Removes duplicates.
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
+{
+ int reduction_count = num_axes;
+ for (int i = 0; i < num_axes; ++i)
+ {
+ int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
+ assert(current >= 0 && current < input_num_dims);
+ for (int j = 0; j < i; j++)
+ {
+ int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
+ // This checks for duplicate axis
+ if (current == previous)
+ {
+ --reduction_count;
+ break;
+ }
+ }
+ }
+ return reduction_count;
+}
+
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
+ bool keep_dims)
+{
+ int input_num_dims = input_shape.num_dims();
+ if (input_num_dims == 0)
+ {
+ return Shape(0);
+ }
+
+ if (keep_dims)
+ {
+ Shape output_shape(input_num_dims);
+ for (int idx = 0; idx < input_num_dims; ++idx)
+ {
+ bool is_axis = false;
+ for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+ {
+ if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+ {
+ is_axis = true;
+ break;
+ }
+ }
+ if (is_axis)
+ {
+ output_shape.dim(idx) = 1;
+ }
+ else
+ {
+ output_shape.dim(idx) = input_shape.dim(idx);
+ }
+ }
+ return output_shape;
+ }
+ else
+ {
+ int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
+ Shape output_shape(input_num_dims - num_reduce_axes);
+ int num_skip_axes = 0;
+ for (int idx = 0; idx < input_num_dims; ++idx)
+ {
+ bool is_axis = false;
+ for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+ {
+ if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+ {
+ ++num_skip_axes;
+ is_axis = true;
+ break;
+ }
+ }
+ if (!is_axis)
+ {
+ output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
+ }
+ }
+ return output_shape;
+ }
+}
+
+ReduceProd::ReduceProd(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+ Tensor *resolved_axes, const ReducerParams ¶ms)
+ : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes}, params)
+{
+}
+
+void ReduceProd::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
+
+ const Shape &input_shape = input()->shape();
+ int input_num_dims = input_shape.num_dims();
+
+ const auto *axes_data = getTensorData<int32_t>(axes());
+ int num_axes = axes()->shape().num_elements();
+ LUCI_INTERPRETER_CHECK(num_axes <= 4);
+
+ // We compute shapes of outputs in configure, assuming that outputs have
+ // static shape
+ // TODO Support dynamic shape
+ Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
+ output()->resize(output_shape);
+
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+
+ temp_index->resize(Shape(input_num_dims));
+ resolved_axes->resize(Shape(num_axes));
+}
+
+void ReduceProd::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ // TODO Support quantized kernels
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void ReduceProd::evalFloat() const
+{
+ const auto *axes_data = getTensorData<int32_t>(axes());
+ int num_axes = axes()->shape().num_elements();
+
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+
+ int num_resolved_axis = 0;
+ LUCI_INTERPRETER_CHECK(
+ tflite::reference_ops::ResolveAxis(input()->shape().num_dims(), axes_data, num_axes,
+ getTensorData<int>(resolved_axes), &num_resolved_axis));
+
+ float init_value = 1.0;
+ tflite::reference_ops::ReduceGeneric<float>(
+ getTensorData<float>(input()), getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+ getTensorData<float>(output()), getTensorShape(output()).DimsData(),
+ output()->shape().num_dims(), axes_data, num_axes, _params.keep_dims,
+ getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), init_value,
+ [](const float current, const float in) -> float { return current * in; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_REDUCE_PROD_H
+#define LUCI_INTERPRETER_KERNELS_REDUCE_PROD_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ReduceProd : public KernelWithParams<ReducerParams>
+{
+public:
+ ReduceProd(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+ Tensor *resolved_axes, const ReducerParams ¶ms);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *axes() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_REDUCE_PROD_H
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReduceProd.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ReduceProdTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ReduceProdTest, FloatNotKeepDims)
+{
+ std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
+ 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+ std::vector<int32_t> axis_data{1, 0, -3, -3};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ReducerParams params{};
+ params.keep_dims = false;
+
+ ReduceProd kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+ params);
+ kernel.configure();
+ _memory_manager->allocate_memory(temp_index);
+ _memory_manager->allocate_memory(resolved_axes);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{3.162341376e+11, 1.9619905536e+12};
+ std::initializer_list<int32_t> ref_output_shape{2};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(ReduceProdTest, FloatKeepDims)
+{
+ std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
+ 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+ std::vector<int32_t> axis_data{0, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ReducerParams params{};
+ params.keep_dims = true;
+
+ ReduceProd kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+ params);
+ kernel.configure();
+ _memory_manager->allocate_memory(temp_index);
+ _memory_manager->allocate_memory(resolved_axes);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{7.74592e+06, 1.197504e+08, 6.6889152e+08};
+ std::initializer_list<int32_t> ref_output_shape{1, 3, 1};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(ReduceProdTest, Input_Output_Type_NEG)
+{
+ std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
+ 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+ std::vector<int32_t> axis_data{0, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ ReducerParams params{};
+ params.keep_dims = true;
+
+ ReduceProd kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+ params);
+
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ReduceProdTest, Invalid_Axes_Type_NEG)
+{
+ std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
+ 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+ std::vector<int64_t> axis_data{0, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S64>({2}, axis_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ReducerParams params{};
+ params.keep_dims = true;
+
+ ReduceProd kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+ params);
+
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
namespace kernels
{
-namespace
-{
-TfLiteFusedActivation get_tflite_activation(Activation activation)
-{
- switch (activation)
- {
- case luci::FusedActFunc::RELU:
- return kTfLiteActRelu;
- case luci::FusedActFunc::RELU6:
- return kTfLiteActRelu6;
- case luci::FusedActFunc::RELU_N1_TO_1:
- return kTfLiteActReluN1To1;
- case luci::FusedActFunc::TANH:
- return kTfLiteActTanh;
- case luci::FusedActFunc::SIGN_BIT:
- return kTfLiteActSignBit;
- case luci::FusedActFunc::NONE:
- return kTfLiteActNone;
- default:
- throw std::runtime_error("Unsupported activation type");
- }
-}
-} // namespace
-
SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
TfLiteSVDFParams params_svdf{};
params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
params_svdf.rank = params().svdf_rank;
- params_svdf.activation = get_tflite_activation(params().activation);
+ params_svdf.activation = getTfLiteActivation(params().activation);
auto scratchpad_activation_state = getOutputTensors()[1];
// Note: it is expected that activation_state input variable tensor reset to zero,
TfLiteSVDFParams params_svdf{};
params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
params_svdf.rank = params().svdf_rank;
- params_svdf.activation = get_tflite_activation(params().activation);
+ params_svdf.activation = getTfLiteActivation(params().activation);
auto scratchpad_activation_state = getOutputTensors()[1];
// Note: it is expected that activation_state input variable tensor reset to zero,
#include "kernels/Tanh.h"
#include "kernels/Utils.h"
+#include <limits> // std::numeric_limits
#include <tensorflow/lite/kernels/internal/reference/tanh.h>
#include "luci_interpreter/MemoryManager.h"
#include <type_traits>
+#include <limits> // std::numeric_limits
#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h>
#include <stdexcept>
+#include <limits> // std::numeric_limits
namespace luci_interpreter
{
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/UnidirectionalSequenceLSTM.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/tensor_utils.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace lstm
+{
+namespace
+{
+
+using namespace tflite;
+
+void UpdateLstmCellFloat(int n_batch, int n_cell, float *cell_state, const float *input_gate,
+ float *forget_gate, const float *cell_gate, bool use_cifg, float clip)
+{
+// NOTE tflite source is as is but will fail build with gcc-8 and above
+// TODO remove #pragma
+#pragma GCC diagnostic ignored "-Wrestrict"
+ tensor_utils::VectorVectorCwiseProduct(forget_gate, cell_state, n_batch * n_cell, cell_state);
+
+ if (use_cifg)
+ {
+ // With CIFG, input_gate = 1-forget_gate. Use the forget_gate array as
+ // scratch, as input_gate array is not allocated in this case. (Be careful
+ // not to write to the scratch before reading the forget gate data.)
+ float *scratch = forget_gate;
+ tensor_utils::Sub1Vector(forget_gate, n_batch * n_cell, scratch);
+ tensor_utils::VectorVectorCwiseProductAccumulate(cell_gate, scratch, n_batch * n_cell,
+ cell_state);
+ }
+ else
+ {
+ tensor_utils::VectorVectorCwiseProductAccumulate(cell_gate, input_gate, n_batch * n_cell,
+ cell_state);
+ }
+ if (clip > 0.0f)
+ {
+ tensor_utils::CwiseClipping(cell_state, n_batch * n_cell, clip);
+ }
+}
+
+void CalculateLstmOutputFloat(int n_batch, int n_cell, int n_output, const float *cell_state,
+ const float *output_gate, TfLiteFusedActivation activation,
+ const float *projection_weights, const float *projection_bias,
+ const float proj_clip, float *output_state, float *scratch)
+{
+ tensor_utils::ApplyActivationToVector(cell_state, n_batch * n_cell, activation, scratch);
+ tensor_utils::VectorVectorCwiseProduct(output_gate, scratch, n_batch * n_cell, scratch);
+
+ const bool use_projection = (projection_weights != nullptr);
+ const bool use_projection_bias = (projection_bias != nullptr);
+
+ if (use_projection)
+ {
+ if (use_projection_bias)
+ {
+ tensor_utils::VectorBatchVectorAssign(projection_bias, n_output, n_batch, output_state);
+ }
+ else
+ {
+ std::fill_n(output_state, n_batch * n_output, 0.0f);
+ }
+ tensor_utils::MatrixBatchVectorMultiplyAccumulate(projection_weights, n_output, n_cell, scratch,
+ n_batch, output_state);
+ if (proj_clip > 0.0f)
+ {
+ tensor_utils::CwiseClipping(output_state, n_batch * n_output, proj_clip);
+ }
+ }
+ else
+ {
+ std::copy_n(scratch, n_batch * n_output, output_state);
+ }
+}
+
+inline void CalculateLstmGateFloat(const float *input, const float *input_to_gate_weights,
+ const float *aux_input, const float *aux_input_to_gate_weights,
+ const float *output_state,
+ const float *recurrent_to_gate_weights, const float *cell_state,
+ const float *cell_to_gate_weights,
+ const float *layer_norm_coefficients, const float *gate_bias,
+ const int n_batch, const int n_input, const int n_aux_input,
+ const int n_output, const int n_cell,
+ const TfLiteFusedActivation activation, float *gate,
+ const bool is_input_all_zeros, const bool is_aux_input_all_zeros)
+{
+ const bool use_peephole = (cell_to_gate_weights != nullptr);
+ const bool use_layer_norm = (layer_norm_coefficients != nullptr);
+
+ // Initialize scratch buffers with bias for regular lstm or initialize with
+ // zero for layer norm lstm.
+ if (use_layer_norm)
+ {
+ std::fill_n(gate, n_cell * n_batch, 0.0f);
+ }
+ else
+ {
+ tensor_utils::VectorBatchVectorAssign(gate_bias, n_cell, n_batch, gate);
+ }
+ // For each batch and cell: compute input_weight * input.
+ // Skip if input is all zeros.
+ if (!is_input_all_zeros)
+ {
+ tensor_utils::MatrixBatchVectorMultiplyAccumulate(input_to_gate_weights, n_cell, n_input, input,
+ n_batch, gate);
+ }
+ // For each batch and cell: compute aux_input_weight * aux_input.
+ // Skip if auxiliary input is not available or all zeros.
+ if (!is_aux_input_all_zeros)
+ {
+ tensor_utils::MatrixBatchVectorMultiplyAccumulate(aux_input_to_gate_weights, n_cell,
+ n_aux_input, aux_input, n_batch, gate);
+ }
+ // For each batch and cell: compute recurrent_weight * output_state.
+ tensor_utils::MatrixBatchVectorMultiplyAccumulate(recurrent_to_gate_weights, n_cell, n_output,
+ output_state, n_batch, gate);
+ // For each batch and cell: compute cell_weight .* cell_state (peephole LSTM)
+ if (use_peephole)
+ {
+ tensor_utils::VectorBatchVectorCwiseProductAccumulate(cell_to_gate_weights, n_cell, cell_state,
+ n_batch, gate);
+ }
+ // Do layer normalization (if layer norm LSTM)
+ if (use_layer_norm)
+ {
+ tensor_utils::MeanStddevNormalization(gate, gate, n_cell, n_batch);
+ tensor_utils::VectorBatchVectorCwiseProduct(layer_norm_coefficients, n_cell, gate, n_batch,
+ gate);
+ tensor_utils::VectorBatchVectorAdd(gate_bias, n_cell, n_batch, gate);
+ }
+ // Apply activation
+ tensor_utils::ApplyActivationToVector(gate, n_batch * n_cell, activation, gate);
+}
+
+inline void LstmStepFloat(
+ const float *input_ptr, const float *input_to_input_weights_ptr,
+ const float *input_to_forget_weights_ptr, const float *input_to_cell_weights_ptr,
+ const float *input_to_output_weights_ptr, const float *aux_input_ptr,
+ const float *aux_input_to_input_weights_ptr, const float *aux_input_to_forget_weights_ptr,
+ const float *aux_input_to_cell_weights_ptr, const float *aux_input_to_output_weights_ptr,
+ const float *recurrent_to_input_weights_ptr, const float *recurrent_to_forget_weights_ptr,
+ const float *recurrent_to_cell_weights_ptr, const float *recurrent_to_output_weights_ptr,
+ const float *cell_to_input_weights_ptr, const float *cell_to_forget_weights_ptr,
+ const float *cell_to_output_weights_ptr, const float *input_layer_norm_coefficients_ptr,
+ const float *forget_layer_norm_coefficients_ptr, const float *cell_layer_norm_coefficients_ptr,
+ const float *output_layer_norm_coefficients_ptr, const float *input_gate_bias_ptr,
+ const float *forget_gate_bias_ptr, const float *cell_gate_bias_ptr,
+ const float *output_gate_bias_ptr, const float *projection_weights_ptr,
+ const float *projection_bias_ptr, const TfLiteLSTMParams *params, int n_batch, int n_cell,
+ int n_input, int n_aux_input, int n_output, int output_batch_leading_dim, float *output_state_ptr,
+ float *cell_state_ptr, float *scratch0, float *scratch1, float *scratch2, float *scratch3,
+ float *output_ptr)
+{
+ // Since we have already checked that weights are all there or none, we can
+ // check the existence of only one to the get the condition.
+ const bool use_cifg = (input_to_input_weights_ptr == nullptr);
+
+ // Make named scratch buffers.
+ float *input_gate_scratch = scratch0;
+ float *forget_gate_scratch = scratch1;
+ float *cell_gate_scratch = scratch2;
+ float *output_gate_scratch = scratch3;
+
+ // Check if inputs are all zeros so we can skip some computations.
+ const bool is_input_all_zeros = tensor_utils::IsZeroVector(input_ptr, n_batch * n_input);
+ const bool is_aux_input_all_zeros =
+ (aux_input_ptr == nullptr || tensor_utils::IsZeroVector(aux_input_ptr, n_batch * n_aux_input));
+ if (!use_cifg)
+ {
+ // Calculate the input gate. (If not CIFG.)
+ CalculateLstmGateFloat(input_ptr, input_to_input_weights_ptr, aux_input_ptr,
+ aux_input_to_input_weights_ptr, output_state_ptr,
+ recurrent_to_input_weights_ptr, cell_state_ptr,
+ cell_to_input_weights_ptr, input_layer_norm_coefficients_ptr,
+ input_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell,
+ /*activation=*/kTfLiteActSigmoid, input_gate_scratch, is_input_all_zeros,
+ is_aux_input_all_zeros);
+ }
+ // Calculate the forget gate.
+ CalculateLstmGateFloat(input_ptr, input_to_forget_weights_ptr, aux_input_ptr,
+ aux_input_to_forget_weights_ptr, output_state_ptr,
+ recurrent_to_forget_weights_ptr, cell_state_ptr,
+ cell_to_forget_weights_ptr, forget_layer_norm_coefficients_ptr,
+ forget_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell,
+ /*activation=*/kTfLiteActSigmoid, forget_gate_scratch, is_input_all_zeros,
+ is_aux_input_all_zeros);
+ // Calculate the cell update gate.
+ CalculateLstmGateFloat(
+ input_ptr, input_to_cell_weights_ptr, aux_input_ptr, aux_input_to_cell_weights_ptr,
+ output_state_ptr, recurrent_to_cell_weights_ptr, /*cell_state=*/nullptr,
+ /*cell_to_gate_weights=*/nullptr, cell_layer_norm_coefficients_ptr, cell_gate_bias_ptr, n_batch,
+ n_input, n_aux_input, n_output, n_cell, params->activation, cell_gate_scratch,
+ is_input_all_zeros, is_aux_input_all_zeros);
+ // Update the cell state.
+ UpdateLstmCellFloat(n_batch, n_cell, cell_state_ptr, input_gate_scratch, forget_gate_scratch,
+ cell_gate_scratch, use_cifg, params->cell_clip);
+ // Calculate output gate.
+ CalculateLstmGateFloat(input_ptr, input_to_output_weights_ptr, aux_input_ptr,
+ aux_input_to_output_weights_ptr, output_state_ptr,
+ recurrent_to_output_weights_ptr, cell_state_ptr,
+ cell_to_output_weights_ptr, output_layer_norm_coefficients_ptr,
+ output_gate_bias_ptr, n_batch, n_input, n_aux_input, n_output, n_cell,
+ /*activation=*/kTfLiteActSigmoid, output_gate_scratch, is_input_all_zeros,
+ is_aux_input_all_zeros);
+ // Update the output state.
+ CalculateLstmOutputFloat(n_batch, n_cell, n_output, cell_state_ptr, output_gate_scratch,
+ params->activation, projection_weights_ptr, projection_bias_ptr,
+ params->proj_clip, output_state_ptr, scratch2);
+ // Copy output state to the output. Note that the output's rows may not be
+ // contiguous (output_batch_leading_dim != n_output).
+ for (int b = 0; b < n_batch; b++)
+ {
+ std::copy_n(output_state_ptr + b * n_output, n_output,
+ output_ptr + b * output_batch_leading_dim);
+ }
+}
+
+} // namespace
+
+void EvalFloat(const Tensor *input,
+
+ const Tensor *input_to_input_weights, const Tensor *input_to_forget_weights,
+ const Tensor *input_to_cell_weights, const Tensor *input_to_output_weights,
+
+ const Tensor *recurrent_to_input_weights, const Tensor *recurrent_to_forget_weights,
+ const Tensor *recurrent_to_cell_weights, const Tensor *recurrent_to_output_weights,
+
+ const Tensor *cell_to_input_weights, const Tensor *cell_to_forget_weights,
+ const Tensor *cell_to_output_weights,
+
+ const Tensor *input_layer_norm_coefficients,
+ const Tensor *forget_layer_norm_coefficients,
+ const Tensor *cell_layer_norm_coefficients,
+ const Tensor *output_layer_norm_coefficients,
+
+ const Tensor *aux_input, const Tensor *aux_input_to_input_weights,
+ const Tensor *aux_input_to_forget_weights, const Tensor *aux_input_to_cell_weights,
+ const Tensor *aux_input_to_output_weights,
+
+ const Tensor *input_gate_bias, const Tensor *forget_gate_bias,
+ const Tensor *cell_gate_bias, const Tensor *output_gate_bias,
+
+ const Tensor *projection_weights, const Tensor *projection_bias,
+ const TfLiteLSTMParams *params,
+
+ bool forward_sequence, bool time_major, int output_offset,
+
+ Tensor *scratch_buffer, Tensor *output_state, Tensor *cell_state, Tensor *output)
+{
+ const Shape &input_shape = input->shape();
+ assert(input_shape.num_dims() >= 2 && input_shape.num_dims() <= 3);
+ int max_time, n_batch;
+ if (input_shape.num_dims() == 3)
+ {
+ max_time = (time_major) ? input_shape.dim(0) : input_shape.dim(1);
+ n_batch = (time_major) ? input_shape.dim(1) : input_shape.dim(0);
+ }
+ else
+ {
+ max_time = 1;
+ n_batch = input_shape.dim(0);
+ }
+ const int n_input = input_shape.dim(input_shape.num_dims() - 1);
+
+ int aux_input_temp = 0;
+ if (aux_input)
+ {
+ const Shape &aux_input_shape = aux_input->shape();
+ aux_input_temp = aux_input_shape.dim(aux_input_shape.num_dims() - 1);
+ }
+ const int aux_input_size = aux_input_temp;
+
+ // n_cell and n_output will be the same size when there is no projection.
+ const Shape &input_to_output_weights_shape = input_to_output_weights->shape();
+ const Shape &recurrent_to_output_weights_shape = recurrent_to_output_weights->shape();
+ const int n_cell = input_to_output_weights_shape.dim(0);
+ const int n_output = recurrent_to_output_weights_shape.dim(1);
+
+ // Since we have already checked that weights are all there or none, we can
+ // check the existence of only one to the get the condition.
+ const bool use_cifg = (input_to_input_weights == nullptr);
+
+ // Index the scratch buffers pointers to the global scratch buffer.
+ float *scratch_buffer_ptr = getTensorData<float>(scratch_buffer);
+ float *input_gate_scratch = nullptr;
+ float *cell_gate_scratch = nullptr;
+ float *forget_gate_scratch = nullptr;
+ float *output_gate_scratch = nullptr;
+ if (use_cifg)
+ {
+ cell_gate_scratch = scratch_buffer_ptr;
+ forget_gate_scratch = scratch_buffer_ptr + n_cell * n_batch;
+ output_gate_scratch = scratch_buffer_ptr + 2 * n_cell * n_batch;
+ }
+ else
+ {
+ input_gate_scratch = scratch_buffer_ptr;
+ cell_gate_scratch = scratch_buffer_ptr + n_cell * n_batch;
+ forget_gate_scratch = scratch_buffer_ptr + 2 * n_cell * n_batch;
+ output_gate_scratch = scratch_buffer_ptr + 3 * n_cell * n_batch;
+ }
+
+ const Shape &output_shape = output->shape();
+ const int output_batch_leading_dim = output_shape.dim(output_shape.num_dims() - 1);
+ if (time_major)
+ {
+ // Loop through the sequence.
+ const int input_step = n_batch * n_input;
+ const int output_step = n_batch * output_batch_leading_dim;
+ for (int t = 0; t < max_time; t++)
+ {
+ // If this is the forward_sequence, step forward, otherwise step
+ // backwards.
+ const int t_rel = forward_sequence ? t : max_time - t - 1;
+ const float *input_ptr = getTensorData<float>(input) + t_rel * input_step;
+ const float *aux_input_ptr = nullptr;
+ if (aux_input)
+ {
+ aux_input_ptr = getTensorData<float>(aux_input) + t_rel * input_step;
+ }
+ float *output_ptr = getTensorData<float>(output) + t_rel * output_step + output_offset;
+
+ LstmStepFloat(
+ input_ptr, getTensorData<float>(input_to_input_weights),
+ getTensorData<float>(input_to_forget_weights), getTensorData<float>(input_to_cell_weights),
+ getTensorData<float>(input_to_output_weights), aux_input_ptr,
+ getTensorData<float>(aux_input_to_input_weights),
+ getTensorData<float>(aux_input_to_forget_weights),
+ getTensorData<float>(aux_input_to_cell_weights),
+ getTensorData<float>(aux_input_to_output_weights),
+ getTensorData<float>(recurrent_to_input_weights),
+ getTensorData<float>(recurrent_to_forget_weights),
+ getTensorData<float>(recurrent_to_cell_weights),
+ getTensorData<float>(recurrent_to_output_weights),
+ getTensorData<float>(cell_to_input_weights), getTensorData<float>(cell_to_forget_weights),
+ getTensorData<float>(cell_to_output_weights),
+ getTensorData<float>(input_layer_norm_coefficients),
+ getTensorData<float>(forget_layer_norm_coefficients),
+ getTensorData<float>(cell_layer_norm_coefficients),
+ getTensorData<float>(output_layer_norm_coefficients), getTensorData<float>(input_gate_bias),
+ getTensorData<float>(forget_gate_bias), getTensorData<float>(cell_gate_bias),
+ getTensorData<float>(output_gate_bias), getTensorData<float>(projection_weights),
+ getTensorData<float>(projection_bias), params, n_batch, n_cell, n_input, aux_input_size,
+ n_output, output_batch_leading_dim, getTensorData<float>(output_state),
+ getTensorData<float>(cell_state), input_gate_scratch, forget_gate_scratch,
+ cell_gate_scratch, output_gate_scratch, output_ptr);
+ }
+ }
+ else
+ {
+ for (int b = 0; b < n_batch; b++)
+ {
+ const int input_step = n_input;
+ const int output_step = output_batch_leading_dim;
+ for (int t = 0; t < max_time; t++)
+ {
+ // If this is the forward_sequence, step forward, otherwise step
+ // backwards.
+ const int t_rel = forward_sequence ? t : max_time - t - 1;
+ const int time_offset = b * max_time + t_rel;
+ const float *input_ptr = getTensorData<float>(input) + time_offset * input_step;
+ const float *aux_input_ptr = nullptr;
+ if (aux_input)
+ {
+ aux_input_ptr = getTensorData<float>(aux_input) + time_offset * input_step;
+ }
+ float *output_ptr =
+ getTensorData<float>(output) + time_offset * output_step + output_offset;
+
+ // Offset the {output,cell}_state pointers to the right batch.
+ float *output_state_ptr = getTensorData<float>(output_state) + b * output_batch_leading_dim;
+ float *cell_state_ptr = getTensorData<float>(cell_state) + b * n_cell;
+ // Offset the scratch pointers to the right batch.
+ float *input_gate_scratch_ptr =
+ input_gate_scratch ? input_gate_scratch + b * n_cell : nullptr;
+ float *forget_gate_scratch_ptr = forget_gate_scratch + b * n_cell;
+ float *cell_gate_scratch_ptr = cell_gate_scratch + b * n_cell;
+ float *output_gate_scratch_ptr = output_gate_scratch + b * n_cell;
+
+ LstmStepFloat(
+ input_ptr, getTensorData<float>(input_to_input_weights),
+ getTensorData<float>(input_to_forget_weights),
+ getTensorData<float>(input_to_cell_weights),
+ getTensorData<float>(input_to_output_weights), aux_input_ptr,
+ getTensorData<float>(aux_input_to_input_weights),
+ getTensorData<float>(aux_input_to_forget_weights),
+ getTensorData<float>(aux_input_to_cell_weights),
+ getTensorData<float>(aux_input_to_output_weights),
+ getTensorData<float>(recurrent_to_input_weights),
+ getTensorData<float>(recurrent_to_forget_weights),
+ getTensorData<float>(recurrent_to_cell_weights),
+ getTensorData<float>(recurrent_to_output_weights),
+ getTensorData<float>(cell_to_input_weights), getTensorData<float>(cell_to_forget_weights),
+ getTensorData<float>(cell_to_output_weights),
+ getTensorData<float>(input_layer_norm_coefficients),
+ getTensorData<float>(forget_layer_norm_coefficients),
+ getTensorData<float>(cell_layer_norm_coefficients),
+ getTensorData<float>(output_layer_norm_coefficients),
+ getTensorData<float>(input_gate_bias), getTensorData<float>(forget_gate_bias),
+ getTensorData<float>(cell_gate_bias), getTensorData<float>(output_gate_bias),
+ getTensorData<float>(projection_weights), getTensorData<float>(projection_bias), params,
+ /*n_batch=*/1, n_cell, n_input, aux_input_size, n_output, output_batch_leading_dim,
+ output_state_ptr, cell_state_ptr, input_gate_scratch_ptr, forget_gate_scratch_ptr,
+ cell_gate_scratch_ptr, output_gate_scratch_ptr, output_ptr);
+ }
+ }
+ }
+}
+
+} // namespace lstm
+} // namespace kernels
+} // namespace luci_interpreter
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+UnidirectionalSequenceLSTM::UnidirectionalSequenceLSTM(
+ const Tensor *input,
+
+ const Tensor *input_to_input_weights, const Tensor *input_to_forget_weights,
+ const Tensor *input_to_cell_weights, const Tensor *input_to_output_weights,
+
+ const Tensor *recurrent_to_input_weights, const Tensor *recurrent_to_forget_weights,
+ const Tensor *recurrent_to_cell_weights, const Tensor *recurrent_to_output_weights,
+
+ const Tensor *cell_to_input_weights, const Tensor *cell_to_forget_weights,
+ const Tensor *cell_to_output_weights,
+
+ const Tensor *input_gate_bias, const Tensor *forget_gate_bias, const Tensor *cell_gate_bias,
+ const Tensor *output_gate_bias,
+
+ const Tensor *projection_weights, const Tensor *projection_bias,
+
+ const Tensor *output_state, const Tensor *cell_state, const Tensor *input_layer_norm_coefficients,
+ const Tensor *forget_layer_norm_coefficients, const Tensor *cell_layer_norm_coefficients,
+ const Tensor *output_layer_norm_coefficients,
+
+ Tensor *output, Tensor *scratchpad_1, Tensor *scratchpad_2, Tensor *scratchpad_3,
+ const UnidirectionalSequenceLSTMParams ¶ms)
+ : KernelWithParams<UnidirectionalSequenceLSTMParams>(
+ {input,
+ input_to_input_weights,
+ input_to_forget_weights,
+ input_to_cell_weights,
+ input_to_output_weights,
+
+ recurrent_to_input_weights,
+ recurrent_to_forget_weights,
+ recurrent_to_cell_weights,
+ recurrent_to_output_weights,
+
+ cell_to_input_weights,
+ cell_to_forget_weights,
+ cell_to_output_weights,
+
+ input_gate_bias,
+ forget_gate_bias,
+ cell_gate_bias,
+ output_gate_bias,
+
+ projection_weights,
+ projection_bias,
+
+ output_state,
+ cell_state,
+
+ input_layer_norm_coefficients,
+ forget_layer_norm_coefficients,
+ cell_layer_norm_coefficients,
+ output_layer_norm_coefficients},
+ {output, scratchpad_1, scratchpad_2, scratchpad_3}, params)
+{
+ // Do nothing
+}
+
+// Check that input tensor dimensions matches with each other.
+void UnidirectionalSequenceLSTM::check_input_tensor_dimensions(int n_input, int n_output,
+ int n_cell, bool use_layer_norm,
+ bool is_integer)
+{
+ // Making sure clipping parameters have valid values.
+ // == 0 means no clipping
+ // > 0 means clipping
+ LUCI_INTERPRETER_CHECK(params().cell_clip >= 0);
+ LUCI_INTERPRETER_CHECK(params().proj_clip >= 0);
+
+ if (input_to_input_weights() != nullptr)
+ {
+ const Shape &input_to_input_weights_shape = input_to_input_weights()->shape();
+ LUCI_INTERPRETER_CHECK(input_to_input_weights_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(input_to_input_weights_shape.dim(0) == n_cell);
+ LUCI_INTERPRETER_CHECK(input_to_input_weights_shape.dim(1) == n_input);
+ }
+
+ const Shape &input_to_forget_weights_shape = input_to_forget_weights()->shape();
+ LUCI_INTERPRETER_CHECK(input_to_forget_weights_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(input_to_forget_weights_shape.dim(0) == n_cell);
+ LUCI_INTERPRETER_CHECK(input_to_forget_weights_shape.dim(1) == n_input);
+
+ const Shape &input_to_cell_weights_shape = input_to_cell_weights()->shape();
+ LUCI_INTERPRETER_CHECK(input_to_cell_weights_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(input_to_cell_weights_shape.dim(0) == n_cell);
+ LUCI_INTERPRETER_CHECK(input_to_cell_weights_shape.dim(1) == n_input);
+
+ if (recurrent_to_input_weights() != nullptr)
+ {
+ const Shape &recurrent_to_input_weights_shape = recurrent_to_input_weights()->shape();
+ LUCI_INTERPRETER_CHECK(recurrent_to_input_weights_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(recurrent_to_input_weights_shape.dim(0) == n_cell);
+ LUCI_INTERPRETER_CHECK(recurrent_to_input_weights_shape.dim(1) == n_output);
+ }
+
+ const Shape &recurrent_to_forget_weights_shape = recurrent_to_forget_weights()->shape();
+ LUCI_INTERPRETER_CHECK(recurrent_to_forget_weights_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(recurrent_to_forget_weights_shape.dim(0) == n_cell);
+ LUCI_INTERPRETER_CHECK(recurrent_to_forget_weights_shape.dim(1) == n_output);
+
+ const Shape &recurrent_to_cell_weights_shape = recurrent_to_cell_weights()->shape();
+ LUCI_INTERPRETER_CHECK(recurrent_to_cell_weights_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(recurrent_to_cell_weights_shape.dim(0) == n_cell);
+ LUCI_INTERPRETER_CHECK(recurrent_to_cell_weights_shape.dim(1) == n_output);
+
+ // We make sure the input-gate's parameters are either both present (regular
+ // LSTM) or not at all (CIFG-LSTM).
+ const bool cifg_weights_all_or_none =
+ ((input_to_input_weights() != nullptr) && (recurrent_to_input_weights() != nullptr)) ||
+ ((input_to_input_weights() == nullptr) && (recurrent_to_input_weights() == nullptr));
+ LUCI_INTERPRETER_CHECK(cifg_weights_all_or_none == true);
+
+ if (cell_to_input_weights() != nullptr)
+ {
+ const Shape &cell_to_input_weights_shape = cell_to_input_weights()->shape();
+ LUCI_INTERPRETER_CHECK(cell_to_input_weights_shape.num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(cell_to_input_weights_shape.dim(0) == n_cell);
+ LUCI_INTERPRETER_CHECK(is_integer
+ ? cell_to_input_weights()->element_type() == loco::DataType::S16
+ : cell_to_input_weights()->element_type() ==
+ input_to_forget_weights()->element_type());
+ }
+
+ if (cell_to_forget_weights() != nullptr)
+ {
+ const Shape &cell_to_forget_weights_shape = cell_to_forget_weights()->shape();
+ LUCI_INTERPRETER_CHECK(cell_to_forget_weights_shape.num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(cell_to_forget_weights_shape.dim(0) == n_cell);
+ LUCI_INTERPRETER_CHECK(is_integer
+ ? cell_to_forget_weights()->element_type() == loco::DataType::S16
+ : cell_to_forget_weights()->element_type() ==
+ input_to_forget_weights()->element_type());
+ }
+
+ if (cell_to_output_weights() != nullptr)
+ {
+ const Shape &cell_to_output_weights_shape = cell_to_output_weights()->shape();
+ LUCI_INTERPRETER_CHECK(cell_to_output_weights_shape.num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(cell_to_output_weights_shape.dim(0) == n_cell);
+ LUCI_INTERPRETER_CHECK(is_integer
+ ? cell_to_output_weights()->element_type() == loco::DataType::S16
+ : cell_to_output_weights()->element_type() ==
+ input_to_forget_weights()->element_type());
+ }
+
+ // Making sure the peephole weights are there all or none.
+ const bool use_cifg = (input_to_input_weights() == nullptr);
+ const bool peephole_weights_all_or_none =
+ ((cell_to_input_weights() != nullptr || use_cifg) && (cell_to_forget_weights() != nullptr) &&
+ (cell_to_output_weights() != nullptr)) ||
+ ((cell_to_input_weights() == nullptr) && (cell_to_forget_weights() == nullptr) &&
+ (cell_to_output_weights() == nullptr));
+ LUCI_INTERPRETER_CHECK(peephole_weights_all_or_none == true);
+
+ // Make sure the input gate bias is present only when not a CIFG-LSTM.
+ if (use_cifg)
+ {
+ LUCI_INTERPRETER_CHECK(input_gate_bias() == nullptr);
+ }
+ else
+ {
+ const Shape &input_gate_bias_shape = input_gate_bias()->shape();
+ LUCI_INTERPRETER_CHECK(input_gate_bias_shape.num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(input_gate_bias_shape.dim(0) == n_cell);
+ if (is_integer)
+ {
+ LUCI_INTERPRETER_CHECK(input_gate_bias()->element_type() == loco::DataType::S32);
+ }
+ else
+ {
+ LUCI_INTERPRETER_CHECK(input_gate_bias()->element_type() == loco::DataType::FLOAT32);
+ }
+ }
+
+ const Shape &forget_gate_bias_shape = forget_gate_bias()->shape();
+ LUCI_INTERPRETER_CHECK(forget_gate_bias_shape.num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(forget_gate_bias_shape.dim(0) == n_cell);
+ if (is_integer)
+ {
+ LUCI_INTERPRETER_CHECK(forget_gate_bias()->element_type() == loco::DataType::S32);
+ }
+ else
+ {
+ LUCI_INTERPRETER_CHECK(forget_gate_bias()->element_type() == loco::DataType::FLOAT32);
+ }
+
+ const Shape &cell_gate_bias_shape = cell_gate_bias()->shape();
+ LUCI_INTERPRETER_CHECK(cell_gate_bias_shape.num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(cell_gate_bias_shape.dim(0) == n_cell);
+ if (is_integer)
+ {
+ LUCI_INTERPRETER_CHECK(cell_gate_bias()->element_type() == loco::DataType::S32);
+ }
+ else
+ {
+ LUCI_INTERPRETER_CHECK(cell_gate_bias()->element_type() == loco::DataType::FLOAT32);
+ }
+
+ const Shape &output_gate_bias_shape = output_gate_bias()->shape();
+ LUCI_INTERPRETER_CHECK(output_gate_bias_shape.num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(output_gate_bias_shape.dim(0) == n_cell);
+ if (is_integer)
+ {
+ LUCI_INTERPRETER_CHECK(output_gate_bias()->element_type() == loco::DataType::S32);
+ }
+ else
+ {
+ LUCI_INTERPRETER_CHECK(output_gate_bias()->element_type() == loco::DataType::FLOAT32);
+ }
+
+ if (projection_weights() != nullptr)
+ {
+ const Shape &projection_weights_shape = projection_weights()->shape();
+ LUCI_INTERPRETER_CHECK(projection_weights_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(projection_weights_shape.dim(0) == n_output);
+ LUCI_INTERPRETER_CHECK(projection_weights_shape.dim(1) == n_cell);
+ }
+
+ if (projection_bias() != nullptr)
+ {
+ const Shape &projection_bias_shape = projection_bias()->shape();
+ LUCI_INTERPRETER_CHECK(projection_bias_shape.num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(projection_bias_shape.dim(0) == n_output);
+ if (is_integer)
+ {
+ LUCI_INTERPRETER_CHECK(projection_bias()->element_type() == loco::DataType::S32);
+ }
+ else
+ {
+ LUCI_INTERPRETER_CHECK(projection_bias()->element_type() == loco::DataType::FLOAT32);
+ }
+ }
+
+ // Making sure the projection tensors are consistent:
+ // 1) If projection weight is not present, then projection bias should not be
+ // present.
+ // 2) If projection weight is present, then projection bias is optional.
+ // TODO(ghodrat): make sure this is correct.
+ const bool projecton_tensors_consistent =
+ ((projection_weights() != nullptr) || (projection_bias() == nullptr));
+ LUCI_INTERPRETER_CHECK(projecton_tensors_consistent == true);
+
+ if (use_layer_norm)
+ {
+ if (use_cifg)
+ {
+ LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients() == nullptr);
+ }
+ else
+ {
+ LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients() != nullptr)
+
+ const Shape &input_layer_norm_coefficients_shape = input_layer_norm_coefficients()->shape();
+ LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients_shape.num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients_shape.dim(0) == n_cell);
+ if (is_integer)
+ {
+ LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients()->element_type() ==
+ loco::DataType::S16);
+ }
+ else
+ {
+ LUCI_INTERPRETER_CHECK(input_layer_norm_coefficients()->element_type() ==
+ loco::DataType::FLOAT32);
+ }
+ }
+
+ const Shape &forget_layer_norm_coefficients_shape = forget_layer_norm_coefficients()->shape();
+ LUCI_INTERPRETER_CHECK(forget_layer_norm_coefficients_shape.num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(forget_layer_norm_coefficients_shape.dim(0) == n_cell);
+ if (is_integer)
+ {
+ LUCI_INTERPRETER_CHECK(forget_layer_norm_coefficients()->element_type() ==
+ loco::DataType::S16);
+ }
+ else
+ {
+ LUCI_INTERPRETER_CHECK(forget_layer_norm_coefficients()->element_type() ==
+ loco::DataType::FLOAT32);
+ }
+
+ const Shape &cell_layer_norm_coefficients_shape = cell_layer_norm_coefficients()->shape();
+ LUCI_INTERPRETER_CHECK(cell_layer_norm_coefficients_shape.num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(cell_layer_norm_coefficients_shape.dim(0) == n_cell);
+ if (is_integer)
+ {
+ LUCI_INTERPRETER_CHECK(cell_layer_norm_coefficients()->element_type() == loco::DataType::S16);
+ }
+ else
+ {
+ LUCI_INTERPRETER_CHECK(cell_layer_norm_coefficients()->element_type() ==
+ loco::DataType::FLOAT32);
+ }
+
+ const Shape &output_layer_norm_coefficients_shape = output_layer_norm_coefficients()->shape();
+ LUCI_INTERPRETER_CHECK(output_layer_norm_coefficients_shape.num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(output_layer_norm_coefficients_shape.dim(0) == n_cell);
+ if (is_integer)
+ {
+ LUCI_INTERPRETER_CHECK(output_layer_norm_coefficients()->element_type() ==
+ loco::DataType::S16);
+ }
+ else
+ {
+ LUCI_INTERPRETER_CHECK(output_layer_norm_coefficients()->element_type() ==
+ loco::DataType::FLOAT32);
+ }
+ }
+}
+
+void UnidirectionalSequenceLSTM::configure()
+{
+ LUCI_INTERPRETER_CHECK(getInputTensors().size() == 24);
+ LUCI_INTERPRETER_CHECK(getOutputTensors().size() >= 1);
+
+ // TODO support U8
+ LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32);
+ const bool is_integer = false;
+ const bool use_layer_norm = (forget_layer_norm_coefficients() != nullptr);
+
+ // Inferring batch size, number of outputs and sequence length and
+ // number of cells from the input tensors.
+ const Shape &input_shape = input()->shape();
+ LUCI_INTERPRETER_CHECK(input_shape.num_dims() > 1);
+ const bool time_major = params().time_major;
+ const int n_batch = time_major ? input_shape.dim(1) : input_shape.dim(0);
+ // NOTE as dim(2) is accessed, we need to check this is valid
+ LUCI_INTERPRETER_CHECK(input_shape.num_dims() > 2);
+ const int n_input = input_shape.dim(2);
+
+ const Shape &input_to_output_weights_shape = input_to_output_weights()->shape();
+ const int n_cell = input_to_output_weights_shape.dim(0);
+ LUCI_INTERPRETER_CHECK(input_to_output_weights_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(input_to_output_weights_shape.dim(1) == n_input);
+
+ const Shape &recurrent_to_output_weights_shape = recurrent_to_output_weights()->shape();
+ LUCI_INTERPRETER_CHECK(recurrent_to_output_weights_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(recurrent_to_output_weights_shape.dim(0) == n_cell);
+
+ const int n_output = recurrent_to_output_weights_shape.dim(1);
+
+ // Check that input tensor dimensions matches with each other.
+ check_input_tensor_dimensions(n_input, n_output, n_cell, use_layer_norm, is_integer);
+
+ // Check the shape of input state tensors.
+ // These tensor may be 1D or 2D. It's fine as long as the total size is
+ // correct.
+ const Shape &output_state_shape = output_state()->shape();
+ const Shape &cell_state_shape = cell_state()->shape();
+ LUCI_INTERPRETER_CHECK(output_state_shape.num_elements() == n_batch * n_output);
+ LUCI_INTERPRETER_CHECK(cell_state_shape.num_elements() == n_batch * n_cell);
+
+ // Resize the output tensors.
+ Shape output_shape = Shape(input_shape.num_dims());
+ for (int i = 0; i < input_shape.num_dims() - 1; i++)
+ {
+ output_shape.dim(i) = input_shape.dim(i);
+ }
+ output_shape.dim(input_shape.num_dims() - 1) = n_output;
+ output()->resize(output_shape);
+
+ // TODO import integer
+
+ // output_state and cell_state are variable tensor; use scratchpad.
+ getOutputTensors()[1]->resize(output_state_shape);
+ getOutputTensors()[2]->resize(cell_state_shape);
+
+ const bool use_cifg = (input_to_input_weights() == nullptr);
+ if (use_cifg)
+ getOutputTensors()[3]->resize({n_batch, n_cell * 3});
+ else
+ getOutputTensors()[3]->resize({n_batch, n_cell * 4});
+
+ // hybrid not supported
+ if (input_to_output_weights()->element_type() == loco::DataType::U8 &&
+ input()->element_type() == loco::DataType::FLOAT32)
+ {
+ throw std::runtime_error("Hybrid type is not currently supported");
+ }
+ // TODO support hybrid
+ // TODO support U8
+}
+
+void UnidirectionalSequenceLSTM::execute() const
+{
+ switch (input()->element_type())
+ {
+ case loco::DataType::FLOAT32:
+ evalFloat();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type");
+ }
+}
+
+void UnidirectionalSequenceLSTM::evalFloat() const
+{
+ const bool time_major = params().time_major;
+ const bool use_layer_norm = (forget_layer_norm_coefficients() != nullptr);
+
+ const Tensor *t_input_layer_norm_coefficients =
+ use_layer_norm ? input_layer_norm_coefficients() : nullptr;
+ const Tensor *t_forget_layer_norm_coefficients =
+ use_layer_norm ? forget_layer_norm_coefficients() : nullptr;
+ const Tensor *t_cell_layer_norm_coefficients =
+ use_layer_norm ? cell_layer_norm_coefficients() : nullptr;
+ const Tensor *t_output_layer_norm_coefficients =
+ use_layer_norm ? output_layer_norm_coefficients() : nullptr;
+
+ Tensor *sp_output_state = getOutputTensors()[1];
+ Tensor *sp_cell_state = getOutputTensors()[2];
+ Tensor *sp_scratch_buffer = getOutputTensors()[3];
+
+ // Note: it is expected that output_state input variable tensor reset to zero,
+ // also expected that this variable tensor doesn't have buffer
+ auto scratchpad_data = getTensorData<float>(sp_output_state);
+ std::fill_n(scratchpad_data, sp_output_state->shape().num_elements(), 0);
+ scratchpad_data = getTensorData<float>(sp_cell_state);
+ std::fill_n(scratchpad_data, sp_cell_state->shape().num_elements(), 0);
+ scratchpad_data = getTensorData<float>(sp_scratch_buffer);
+ std::fill_n(scratchpad_data, sp_scratch_buffer->shape().num_elements(), 0);
+
+ TfLiteLSTMParams lstm_params{};
+ lstm_params.activation = getTfLiteActivation(params().activation);
+ lstm_params.cell_clip = params().cell_clip;
+ lstm_params.proj_clip = params().proj_clip;
+ lstm_params.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+
+ lstm::EvalFloat(input(), input_to_input_weights(), input_to_forget_weights(),
+ input_to_cell_weights(), input_to_output_weights(),
+
+ recurrent_to_input_weights(), recurrent_to_forget_weights(),
+ recurrent_to_cell_weights(), recurrent_to_output_weights(),
+
+ cell_to_input_weights(), cell_to_forget_weights(), cell_to_output_weights(),
+
+ t_input_layer_norm_coefficients, t_forget_layer_norm_coefficients,
+ t_cell_layer_norm_coefficients, t_output_layer_norm_coefficients,
+ /*aux_input=*/nullptr,
+ /*aux_input_to_input_weights=*/nullptr,
+ /*aux_input_to_forget_weights=*/nullptr,
+ /*aux_input_to_cell_weights=*/nullptr,
+ /*aux_input_to_output_weights=*/nullptr, input_gate_bias(), forget_gate_bias(),
+ cell_gate_bias(), output_gate_bias(),
+
+ projection_weights(), projection_bias(), &lstm_params,
+ /*forward_sequence=*/true, time_major,
+ /*output_offset=*/0, sp_scratch_buffer, sp_output_state, sp_cell_state, output());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UNIDIRECTIONALSEQUENCELSTM_H
+#define LUCI_INTERPRETER_KERNELS_UNIDIRECTIONALSEQUENCELSTM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class UnidirectionalSequenceLSTM : public KernelWithParams<UnidirectionalSequenceLSTMParams>
+{
+public:
+ UnidirectionalSequenceLSTM(
+ const Tensor *input,
+
+ const Tensor *input_to_input_weights, const Tensor *input_to_forget_weights,
+ const Tensor *input_to_cell_weights, const Tensor *input_to_output_weights,
+
+ const Tensor *recurrent_to_input_weights, const Tensor *recurrent_to_forget_weights,
+ const Tensor *recurrent_to_cell_weights, const Tensor *recurrent_to_output_weights,
+
+ const Tensor *cell_to_input_weights, const Tensor *cell_to_forget_weights,
+ const Tensor *cell_to_output_weights,
+
+ const Tensor *input_gate_bias, const Tensor *forget_gate_bias, const Tensor *cell_gate_bias,
+ const Tensor *output_gate_bias,
+
+ const Tensor *projection_weights, const Tensor *projection_bias,
+
+ const Tensor *output_state, const Tensor *cell_state,
+
+ const Tensor *input_layer_norm_coefficients, const Tensor *forget_layer_norm_coefficients,
+ const Tensor *cell_layer_norm_coefficients, const Tensor *output_layer_norm_coefficients,
+
+ Tensor *output, Tensor *scratchpad_1, Tensor *scratchpad_2, Tensor *scratchpad_3,
+ const UnidirectionalSequenceLSTMParams ¶ms);
+
+ const Tensor *input() const { return _inputs[0]; }
+
+ const Tensor *input_to_input_weights() const { return _inputs[1]; }
+ const Tensor *input_to_forget_weights() const { return _inputs[2]; }
+ const Tensor *input_to_cell_weights() const { return _inputs[3]; }
+ const Tensor *input_to_output_weights() const { return _inputs[4]; }
+
+ const Tensor *recurrent_to_input_weights() const { return _inputs[5]; }
+ const Tensor *recurrent_to_forget_weights() const { return _inputs[6]; }
+ const Tensor *recurrent_to_cell_weights() const { return _inputs[7]; }
+ const Tensor *recurrent_to_output_weights() const { return _inputs[8]; }
+
+ const Tensor *cell_to_input_weights() const { return _inputs[9]; }
+ const Tensor *cell_to_forget_weights() const { return _inputs[10]; }
+ const Tensor *cell_to_output_weights() const { return _inputs[11]; }
+
+ const Tensor *input_gate_bias() const { return _inputs[12]; }
+ const Tensor *forget_gate_bias() const { return _inputs[13]; }
+ const Tensor *cell_gate_bias() const { return _inputs[14]; }
+ const Tensor *output_gate_bias() const { return _inputs[15]; }
+
+ const Tensor *projection_weights() const { return _inputs[16]; }
+ const Tensor *projection_bias() const { return _inputs[17]; }
+
+ const Tensor *output_state() const { return _inputs[18]; }
+ const Tensor *cell_state() const { return _inputs[19]; }
+
+ const Tensor *input_layer_norm_coefficients() const { return _inputs[20]; }
+ const Tensor *forget_layer_norm_coefficients() const { return _inputs[21]; }
+ const Tensor *cell_layer_norm_coefficients() const { return _inputs[22]; }
+ const Tensor *output_layer_norm_coefficients() const { return _inputs[23]; }
+
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+
+private:
+ void check_input_tensor_dimensions(int n_input, int n_output, int n_cell, bool use_layer_norm,
+ bool is_integer);
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UNIDIRECTIONALSEQUENCELSTM_H
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/UnidirectionalSequenceLSTM.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class UnidirectionalSequenceLSTMTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// NOTE from NoCifgNoPeepholeNoProjectionNoClippingUnidirectionalLstmTest
+TEST_F(UnidirectionalSequenceLSTMTest, FloatTest)
+{
+ const int32_t n_batch = 1;
+ const int32_t n_input = 2;
+ const int32_t n_cell = 4;
+ const int32_t n_output = 4;
+ const int32_t sequence_length = 3;
+
+ std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589, -0.34550029,
+ 0.04266912, -0.15680569, -0.34856534, 0.43890524};
+
+ std::vector<float> input_to_cell_weights = {-0.50013041, 0.1370284, 0.11810488, 0.2013163,
+ -0.20583314, 0.44344562, 0.22077113, -0.29909778};
+
+ std::vector<float> input_to_forget_weights = {0.09701663, 0.20334584, -0.50592935, -0.31343272,
+ -0.40032279, 0.44781327, 0.01387155, -0.35593212};
+
+ std::vector<float> input_to_output_weights = {-0.25065863, -0.28290087, 0.04613829, 0.40525138,
+ 0.44272184, 0.03897077, -0.1556896, 0.19487578};
+
+ std::vector<float> input_gate_bias = {0., 0., 0., 0.};
+ std::vector<float> forget_gate_bias = {1., 1., 1., 1.};
+ std::vector<float> cell_gate_bias = {0., 0., 0., 0.};
+ std::vector<float> output_gate_bias = {0., 0., 0., 0.};
+
+ std::vector<float> recurrent_to_input_weights = {
+ -0.0063535, -0.2042388, 0.31454784, -0.35746509, 0.28902304, 0.08183324,
+ -0.16555229, 0.02286911, -0.13566875, 0.03034258, 0.48091322, -0.12528998,
+ 0.24077177, -0.51332325, -0.33502164, 0.10629296};
+
+ std::vector<float> recurrent_to_forget_weights = {
+ -0.48684245, -0.06655136, 0.42224967, 0.2112639, 0.27654213, 0.20864892,
+ -0.07646349, 0.45877004, 0.00141793, -0.14609534, 0.36447752, 0.09196436,
+ 0.28053468, 0.01560611, -0.20127171, -0.01140004};
+
+ std::vector<float> recurrent_to_cell_weights = {
+ -0.3407414, 0.24443203, -0.2078532, 0.26320225, 0.05695659, -0.00123841,
+ -0.4744786, -0.35869038, -0.06418842, -0.13502428, -0.501764, 0.22830659,
+ -0.46367589, 0.26016325, -0.03894562, -0.16368064};
+
+ std::vector<float> recurrent_to_output_weights = {
+ 0.43385774, -0.17194885, 0.2718237, 0.09215671, 0.24107647, -0.39835793,
+ 0.18212086, 0.01301402, 0.48572797, -0.50656658, 0.20047462, -0.20607421,
+ -0.51818722, -0.15390486, 0.0468148, 0.39922136};
+
+ Shape input_to_input_weights_shape{n_cell, n_input};
+ Shape input_to_cell_weights_shape{n_cell, n_input};
+ Shape input_to_forget_weights_shape{n_cell, n_input};
+ Shape input_to_output_weights_shape{n_cell, n_input};
+
+ Shape input_gate_bias_shape{n_cell};
+ Shape forget_gate_bias_shape{n_cell};
+ Shape cell_gate_bias_shape{n_cell};
+ Shape output_gate_bias_shape{n_cell};
+
+ Shape recurrent_to_input_weights_shape{n_cell, n_output};
+ Shape recurrent_to_cell_weights_shape{n_cell, n_output};
+ Shape recurrent_to_forget_weights_shape{n_cell, n_output};
+ Shape recurrent_to_output_weights_shape{n_cell, n_output};
+
+ Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+ Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get());
+ Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get());
+ Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_output_weights_shape, input_to_output_weights, _memory_manager.get());
+
+ Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_gate_bias_shape, input_gate_bias, _memory_manager.get());
+ Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ forget_gate_bias_shape, forget_gate_bias, _memory_manager.get());
+ Tensor cell_gate_bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get());
+ Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ output_gate_bias_shape, output_gate_bias, _memory_manager.get());
+
+ Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get());
+ Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get());
+ Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get());
+ Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get());
+
+ std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+ Shape input_shape{sequence_length, n_batch, n_input};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+ Shape output_state_shape{n_batch, n_output};
+ Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ output_state_tensor.resize(output_state_shape);
+
+ Shape cell_state_shape{n_batch, n_cell};
+ Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ cell_state_tensor.resize(cell_state_shape);
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+ UnidirectionalSequenceLSTMParams params{};
+ params.activation = Activation::TANH;
+ params.cell_clip = 0.0;
+ params.proj_clip = 0.0;
+ params.time_major = true;
+ params.asymmetric_quantize_inputs = false;
+
+ UnidirectionalSequenceLSTM kernel(
+ &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor,
+ &input_to_cell_weights_tensor, &input_to_output_weights_tensor,
+ &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor,
+ &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr,
+ nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor,
+ &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr,
+ nullptr, nullptr, nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(output_state_tensor);
+ _memory_manager->allocate_memory(cell_state_tensor);
+ _memory_manager->allocate_memory(scratchpad_1);
+ _memory_manager->allocate_memory(scratchpad_2);
+ _memory_manager->allocate_memory(scratchpad_3);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{-0.02973187, 0.1229473, 0.20885126, -0.15358765,
+ -0.03716109, 0.12507336, 0.41193449, -0.20860538,
+ -0.15053082, 0.09120187, 0.24278517, -0.12222792};
+
+ std::vector<float> ref_output_shape{sequence_length, n_batch, n_output};
+ const float tolerance = 1e-5;
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, FloatTest_batch)
+{
+ const int32_t n_batch = 1;
+ const int32_t n_input = 2;
+ const int32_t n_cell = 4;
+ const int32_t n_output = 4;
+ const int32_t sequence_length = 3;
+
+ std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589, -0.34550029,
+ 0.04266912, -0.15680569, -0.34856534, 0.43890524};
+
+ std::vector<float> input_to_cell_weights = {-0.50013041, 0.1370284, 0.11810488, 0.2013163,
+ -0.20583314, 0.44344562, 0.22077113, -0.29909778};
+
+ std::vector<float> input_to_forget_weights = {0.09701663, 0.20334584, -0.50592935, -0.31343272,
+ -0.40032279, 0.44781327, 0.01387155, -0.35593212};
+
+ std::vector<float> input_to_output_weights = {-0.25065863, -0.28290087, 0.04613829, 0.40525138,
+ 0.44272184, 0.03897077, -0.1556896, 0.19487578};
+
+ std::vector<float> input_gate_bias = {0., 0., 0., 0.};
+ std::vector<float> forget_gate_bias = {1., 1., 1., 1.};
+ std::vector<float> cell_gate_bias = {0., 0., 0., 0.};
+ std::vector<float> output_gate_bias = {0., 0., 0., 0.};
+
+ std::vector<float> recurrent_to_input_weights = {
+ -0.0063535, -0.2042388, 0.31454784, -0.35746509, 0.28902304, 0.08183324,
+ -0.16555229, 0.02286911, -0.13566875, 0.03034258, 0.48091322, -0.12528998,
+ 0.24077177, -0.51332325, -0.33502164, 0.10629296};
+
+ std::vector<float> recurrent_to_forget_weights = {
+ -0.48684245, -0.06655136, 0.42224967, 0.2112639, 0.27654213, 0.20864892,
+ -0.07646349, 0.45877004, 0.00141793, -0.14609534, 0.36447752, 0.09196436,
+ 0.28053468, 0.01560611, -0.20127171, -0.01140004};
+
+ std::vector<float> recurrent_to_cell_weights = {
+ -0.3407414, 0.24443203, -0.2078532, 0.26320225, 0.05695659, -0.00123841,
+ -0.4744786, -0.35869038, -0.06418842, -0.13502428, -0.501764, 0.22830659,
+ -0.46367589, 0.26016325, -0.03894562, -0.16368064};
+
+ std::vector<float> recurrent_to_output_weights = {
+ 0.43385774, -0.17194885, 0.2718237, 0.09215671, 0.24107647, -0.39835793,
+ 0.18212086, 0.01301402, 0.48572797, -0.50656658, 0.20047462, -0.20607421,
+ -0.51818722, -0.15390486, 0.0468148, 0.39922136};
+
+ Shape input_to_input_weights_shape{n_cell, n_input};
+ Shape input_to_cell_weights_shape{n_cell, n_input};
+ Shape input_to_forget_weights_shape{n_cell, n_input};
+ Shape input_to_output_weights_shape{n_cell, n_input};
+
+ Shape input_gate_bias_shape{n_cell};
+ Shape forget_gate_bias_shape{n_cell};
+ Shape cell_gate_bias_shape{n_cell};
+ Shape output_gate_bias_shape{n_cell};
+
+ Shape recurrent_to_input_weights_shape{n_cell, n_output};
+ Shape recurrent_to_cell_weights_shape{n_cell, n_output};
+ Shape recurrent_to_forget_weights_shape{n_cell, n_output};
+ Shape recurrent_to_output_weights_shape{n_cell, n_output};
+
+ Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+ Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get());
+ Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get());
+ Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_output_weights_shape, input_to_output_weights, _memory_manager.get());
+
+ Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_gate_bias_shape, input_gate_bias, _memory_manager.get());
+ Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ forget_gate_bias_shape, forget_gate_bias, _memory_manager.get());
+ Tensor cell_gate_bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get());
+ Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ output_gate_bias_shape, output_gate_bias, _memory_manager.get());
+
+ Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get());
+ Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get());
+ Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get());
+ Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get());
+
+ std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+ Shape input_shape{n_batch, sequence_length, n_input};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+ Shape output_state_shape{n_batch, n_output};
+ Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ output_state_tensor.resize(output_state_shape);
+
+ Shape cell_state_shape{n_batch, n_cell};
+ Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ cell_state_tensor.resize(cell_state_shape);
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+
+ UnidirectionalSequenceLSTMParams params{};
+ params.activation = Activation::TANH;
+ params.cell_clip = 0.0;
+ params.proj_clip = 0.0;
+ params.time_major = false;
+ params.asymmetric_quantize_inputs = false;
+
+ UnidirectionalSequenceLSTM kernel(
+ &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor,
+ &input_to_cell_weights_tensor, &input_to_output_weights_tensor,
+ &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor,
+ &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr,
+ nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor,
+ &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr,
+ nullptr, nullptr, nullptr, &output_tensor, &output_state_tensor, &cell_state_tensor,
+ &scratchpad_1, params);
+
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(output_state_tensor);
+ _memory_manager->allocate_memory(cell_state_tensor);
+ _memory_manager->allocate_memory(scratchpad_1);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{-0.02973187, 0.1229473, 0.20885126, -0.15358765,
+ -0.03716109, 0.12507336, 0.41193449, -0.20860538,
+ -0.15053082, 0.09120187, 0.24278517, -0.12222792};
+
+ std::vector<float> ref_output_shape{n_batch, sequence_length, n_output};
+ const float tolerance = 1e-5;
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, FloatTest_simple)
+{
+ const int32_t n_batch = 1;
+ const int32_t n_input = 1;
+ const int32_t n_cell = 1;
+ const int32_t n_output = 1;
+ const int32_t sequence_length = 1;
+
+ std::vector<float> input_to_input_weights = {0.329067};
+ std::vector<float> input_to_forget_weights = {0.308059};
+ std::vector<float> input_to_cell_weights = {0.152916};
+ std::vector<float> input_to_output_weights = {-0.476033};
+
+ std::vector<float> input_gate_bias = {0.};
+ std::vector<float> forget_gate_bias = {1.};
+ std::vector<float> cell_gate_bias = {0.};
+ std::vector<float> output_gate_bias = {0.};
+
+ std::vector<float> recurrent_to_input_weights = {0.207806};
+ std::vector<float> recurrent_to_forget_weights = {0.028718};
+ std::vector<float> recurrent_to_cell_weights = {-0.182756};
+ std::vector<float> recurrent_to_output_weights = {-0.960517};
+
+ Shape input_to_input_weights_shape{n_cell, n_input};
+ Shape input_to_cell_weights_shape{n_cell, n_input};
+ Shape input_to_forget_weights_shape{n_cell, n_input};
+ Shape input_to_output_weights_shape{n_cell, n_input};
+
+ Shape input_gate_bias_shape{n_cell};
+ Shape forget_gate_bias_shape{n_cell};
+ Shape cell_gate_bias_shape{n_cell};
+ Shape output_gate_bias_shape{n_cell};
+
+ Shape recurrent_to_input_weights_shape{n_cell, n_output};
+ Shape recurrent_to_cell_weights_shape{n_cell, n_output};
+ Shape recurrent_to_forget_weights_shape{n_cell, n_output};
+ Shape recurrent_to_output_weights_shape{n_cell, n_output};
+
+ Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+ Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get());
+ Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get());
+ Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_output_weights_shape, input_to_output_weights, _memory_manager.get());
+
+ Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_gate_bias_shape, input_gate_bias, _memory_manager.get());
+ Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ forget_gate_bias_shape, forget_gate_bias, _memory_manager.get());
+ Tensor cell_gate_bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get());
+ Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ output_gate_bias_shape, output_gate_bias, _memory_manager.get());
+
+ Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get());
+ Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get());
+ Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get());
+ Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get());
+
+ std::vector<float> input_data{0.03653763};
+ Shape input_shape{n_batch, sequence_length, n_input};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+ Shape output_state_shape{n_batch, n_output};
+ Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ output_state_tensor.resize(output_state_shape);
+
+ Shape cell_state_shape{n_batch, n_cell};
+ Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ cell_state_tensor.resize(cell_state_shape);
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+
+ UnidirectionalSequenceLSTMParams params{};
+ params.activation = Activation::TANH;
+ params.cell_clip = 10.0;
+ params.proj_clip = 0.0;
+ params.time_major = false;
+ params.asymmetric_quantize_inputs = false;
+
+ UnidirectionalSequenceLSTM kernel(
+ &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor,
+ &input_to_cell_weights_tensor, &input_to_output_weights_tensor,
+ &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor,
+ &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr,
+ nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor,
+ &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr,
+ nullptr, nullptr, nullptr, &output_tensor, &output_state_tensor, &cell_state_tensor,
+ &scratchpad_1, params);
+
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(output_state_tensor);
+ _memory_manager->allocate_memory(cell_state_tensor);
+ _memory_manager->allocate_memory(scratchpad_1);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0.00139296};
+ std::vector<float> ref_output_shape{n_batch, sequence_length, n_output};
+ const float tolerance = 1e-5;
+ auto aa = extractTensorData<float>(output_tensor);
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, Unsupported_Type_Configure_NEG)
+{
+ const int32_t n_batch = 1;
+ const int32_t n_input = 2;
+ const int32_t n_cell = 4;
+ const int32_t n_output = 4;
+ const int32_t sequence_length = 3;
+
+ std::vector<int8_t> input_data{2, 3, 3, 4, 1, 1}; // int8 is not support as of now
+ Shape input_shape{sequence_length, n_batch, n_input};
+ Tensor input_tensor =
+ makeInputTensor<DataType::S8>(input_shape, input_data, _memory_manager.get());
+
+ std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589, -0.34550029,
+ 0.04266912, -0.15680569, -0.34856534, 0.43890524};
+ Shape input_to_input_weights_shape{n_cell, n_input};
+ Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+ UnidirectionalSequenceLSTMParams params{};
+ params.activation = Activation::TANH;
+ params.cell_clip = 0.0;
+ params.proj_clip = 0.0;
+ params.time_major = true;
+ params.asymmetric_quantize_inputs = false;
+
+ UnidirectionalSequenceLSTM kernel(
+ &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr,
+ nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, Invalid_Input_Shape_NEG)
+{
+ const int32_t n_batch = 1;
+ const int32_t n_input = 2;
+ const int32_t n_cell = 4;
+ const int32_t n_output = 4;
+ const int32_t sequence_length = 3;
+
+ std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+ Shape input_shape{sequence_length, n_input}; // this is wrong
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+ std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589, -0.34550029,
+ 0.04266912, -0.15680569, -0.34856534, 0.43890524};
+ Shape input_to_input_weights_shape{n_cell, n_input};
+ Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+ UnidirectionalSequenceLSTMParams params{};
+ params.activation = Activation::TANH;
+ params.cell_clip = 0.0;
+ params.proj_clip = 0.0;
+ params.time_major = true;
+ params.asymmetric_quantize_inputs = false;
+
+ UnidirectionalSequenceLSTM kernel(
+ &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr,
+ nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, Invalid_Input_Shape_2_NEG)
+{
+ const int32_t n_batch = 1;
+ const int32_t n_input = 2;
+ const int32_t n_cell = 4;
+ const int32_t n_output = 4;
+ const int32_t sequence_length = 3;
+
+ std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+ Shape input_shape{sequence_length, n_batch, n_input};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+ std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589, -0.34550029,
+ 0.04266912, -0.15680569, -0.34856534, 0.43890524};
+ Shape input_to_input_weights_shape{n_cell, n_input};
+ Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+ UnidirectionalSequenceLSTMParams params{};
+ params.activation = Activation::TANH;
+ params.cell_clip = 0.0;
+ params.proj_clip = 0.0;
+ params.time_major = true;
+ params.asymmetric_quantize_inputs = false;
+
+ // NOTE provide wrong shaped inputs
+ UnidirectionalSequenceLSTM kernel(
+ &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr,
+ nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
namespace kernels
{
+TfLiteFusedActivation getTfLiteActivation(Activation activation)
+{
+ switch (activation)
+ {
+ case luci::FusedActFunc::RELU:
+ return kTfLiteActRelu;
+ case luci::FusedActFunc::RELU6:
+ return kTfLiteActRelu6;
+ case luci::FusedActFunc::RELU_N1_TO_1:
+ return kTfLiteActReluN1To1;
+ case luci::FusedActFunc::TANH:
+ return kTfLiteActTanh;
+ case luci::FusedActFunc::SIGN_BIT:
+ return kTfLiteActSignBit;
+ case luci::FusedActFunc::NONE:
+ return kTfLiteActNone;
+ default:
+ throw std::runtime_error("Unsupported activation type");
+ }
+}
+
template <typename T>
void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
{
#include "core/KernelParams.h"
#include "luci_interpreter/core/Tensor.h"
+#include <tensorflow/lite/kernels/internal/tensor_utils.h>
#include <tensorflow/lite/kernels/internal/types.h>
#include <cassert>
return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3;
}
+TfLiteFusedActivation getTfLiteActivation(Activation activation);
+
template <typename T>
void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));
if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node))
- throw std::runtime_error("Unsupported Custom operator. " + node->name());
+ {
+ const auto *cnode = loco::must_cast<const luci::CircleCustom *>(node);
+ throw std::runtime_error("Unsupported Custom operator. " + cnode->custom_code() + " in " +
+ node->name());
+ }
if (!isTensorProducingNode(node))
continue;
#include "loader/KernelBuilder.h"
#include "loader/nodes/Builders.h"
+#include <luci/IR/CircleOpcode.h>
+#include <luci/IR/CircleNodeDecl.h>
+
#include <stdexcept>
+namespace
+{
+
+// TODO Extract this helper function
+const std::string toString(luci::CircleOpcode opcode)
+{
+ static const char *names[] = {
+#define CIRCLE_NODE(OPCODE, CIRCLE_CLASS) #CIRCLE_CLASS,
+#define CIRCLE_VNODE(OPCODE, CIRCLE_CLASS) #CIRCLE_CLASS,
+#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_NODE
+#undef CIRCLE_VNODE
+ };
+
+ auto const node_name = names[static_cast<int>(opcode)];
+
+ assert(std::string(node_name).substr(0, 6) == "Circle"); // FIX_ME_UNLESS
+
+ // Return substring of class name ("Circle" is sliced out)
+ // Ex: Return "Conv2D" for "CircleConv2D" node
+ return std::string(node_name).substr(6);
+}
+
+} // namespace
+
namespace luci_interpreter
{
return specific_builder(node, *this);
std::string msg = "Unsupported operator: ";
- msg += std::to_string(static_cast<uint32_t>(node->opcode())) + " " + std::string(node->name());
+ msg += toString(node->opcode()) + " in " + std::string(node->name());
throw std::invalid_argument(msg.c_str());
}
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Abs.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAbs(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleAbs *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Abs>(input, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ReduceProd.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReduceProd(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleReduceProd *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *axes = helper.getInputTensor(node->reduction_indices());
+ Tensor *output = helper.getOutputTensor(node);
+
+ auto temp_index_unique =
+ std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+ temp_index_unique->set_observable(false);
+ temp_index_unique->set_data_buffer(nullptr);
+ Tensor *temp_index =
+ helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique));
+
+ auto resolved_axes_unique =
+ std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+ resolved_axes_unique->set_observable(false);
+ resolved_axes_unique->set_data_buffer(nullptr);
+ Tensor *resolved_axes =
+ helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique));
+
+ ReducerParams params{};
+ params.keep_dims = node->keep_dims();
+
+ return std::make_unique<kernels::ReduceProd>(input, axes, output, temp_index, resolved_axes,
+ params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/UnidirectionalSequenceLSTM.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleUnidirectionalSequenceLSTM(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleUnidirectionalSequenceLSTM *>(circle_node);
+ assert(node->arity() == 24);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *input_to_input_weights =
+ helper.getOptionalInputTensor(node->input_to_input_weights());
+ const Tensor *input_to_cell_weights = helper.getInputTensor(node->input_to_cell_weights());
+ const Tensor *input_to_forget_weights = helper.getInputTensor(node->input_to_forget_weights());
+ const Tensor *input_to_output_weights = helper.getInputTensor(node->input_to_output_weights());
+ const Tensor *recurrent_to_input_weights =
+ helper.getOptionalInputTensor(node->recurrent_to_input_weights());
+ const Tensor *recurrent_to_cell_weights =
+ helper.getInputTensor(node->recurrent_to_cell_weights());
+ const Tensor *recurrent_to_forget_weights =
+ helper.getInputTensor(node->recurrent_to_forget_weights());
+ const Tensor *recurrent_to_output_weights =
+ helper.getInputTensor(node->recurrent_to_output_weights());
+ const Tensor *cell_to_input_weights =
+ helper.getOptionalInputTensor(node->cell_to_input_weights());
+ const Tensor *cell_to_forget_weights =
+ helper.getOptionalInputTensor(node->cell_to_forget_weights());
+ const Tensor *cell_to_output_weights =
+ helper.getOptionalInputTensor(node->cell_to_output_weights());
+ const Tensor *input_gate_bias = helper.getOptionalInputTensor(node->input_gate_bias());
+ const Tensor *forget_gate_bias = helper.getInputTensor(node->forget_gate_bias());
+ const Tensor *cell_gate_bias = helper.getInputTensor(node->cell_gate_bias());
+ const Tensor *output_gate_bias = helper.getInputTensor(node->output_gate_bias());
+ const Tensor *projection_weights = helper.getOptionalInputTensor(node->projection_weights());
+ const Tensor *projection_bias = helper.getOptionalInputTensor(node->projection_bias());
+ const Tensor *output_state = helper.getInputTensor(node->output_state());
+ const Tensor *cell_state = helper.getInputTensor(node->cell_state());
+ const Tensor *input_layer_norm_coefficients =
+ helper.getOptionalInputTensor(node->input_layer_norm_coefficients());
+ const Tensor *forget_layer_norm_coefficients =
+ helper.getOptionalInputTensor(node->forget_layer_norm_coefficients());
+ const Tensor *cell_layer_norm_coefficients =
+ helper.getOptionalInputTensor(node->cell_layer_norm_coefficients());
+ const Tensor *output_layer_norm_coefficients =
+ helper.getOptionalInputTensor(node->output_layer_norm_coefficients());
+ Tensor *output = helper.getOutputTensor(node);
+
+ // scratch pad tensor
+ // NOTE provide more scratch pads if support hybrid or integer
+ auto sp_output_state =
+ std::make_unique<Tensor>(output_state->element_type(), Shape({}), AffineQuantization{}, "");
+ sp_output_state->set_observable(false);
+ sp_output_state->set_data_buffer(nullptr);
+ Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(sp_output_state));
+
+ auto sp_cell_state =
+ std::make_unique<Tensor>(cell_state->element_type(), Shape({}), AffineQuantization{}, "");
+ sp_cell_state->set_observable(false);
+ sp_cell_state->set_data_buffer(nullptr);
+ Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(sp_cell_state));
+
+ auto sp_3 = std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
+ sp_3->set_observable(false);
+ sp_3->set_data_buffer(nullptr);
+ Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(sp_3));
+
+ UnidirectionalSequenceLSTMParams params{};
+ params.activation = node->fusedActivationFunction();
+ params.cell_clip = node->cell_clip();
+ params.proj_clip = node->proj_clip();
+ params.time_major = node->time_major();
+ params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs();
+
+ return std::make_unique<kernels::UnidirectionalSequenceLSTM>(
+ input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights,
+ input_to_output_weights, recurrent_to_input_weights, recurrent_to_forget_weights,
+ recurrent_to_cell_weights, recurrent_to_output_weights, cell_to_input_weights,
+ cell_to_forget_weights, cell_to_output_weights, input_gate_bias, forget_gate_bias,
+ cell_gate_bias, output_gate_bias, projection_weights, projection_bias, output_state, cell_state,
+ input_layer_norm_coefficients, forget_layer_norm_coefficients, cell_layer_norm_coefficients,
+ output_layer_norm_coefficients, output, tmp_1, tmp_2, tmp_3, params);
+}
+
+} // namespace luci_interpreter
+++ /dev/null
-set(ARM_C_COMPILER "arm-none-eabi-gcc")
-set(ARM_ASM_COMPILER "arm-none-eabi-gcc")
-set(ARM_CXX_COMPILER "arm-none-eabi-g++")
-set(ARM_OBJCOPY "arm-none-eabi-objcopy")
-
-find_program(ARM_C_COMPILER_PATH ${ARM_C_COMPILER})
-
-if(NOT ARM_C_COMPILER_PATH)
- message(STATUS "Build luci-micro: FALSE(ARM compiler is NOT FOUND)")
- return()
-endif()
-
-set(CMAKE_ARM_OPTIONS
- -DLUCI_INTERPRETER_STATIC=ON
- -DLUCI_STATIC=ON
- -DBUILD_CMSIS_NN_FUNCTIONS=ON
- -DTARGET_CPU=cortex-m7
- "-DCMAKE_TOOLCHAIN_FILE=${NNAS_PROJECT_SOURCE_DIR}/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake"
- "-DLUCI_INTERPRETER_PAL_DIR=${CMAKE_CURRENT_SOURCE_DIR}/../luci-interpreter/pal/mcu"
- "-DNNAS_PROJECT_SOURCE_DIR=${NNAS_PROJECT_SOURCE_DIR}"
- "-DNNAS_EXTERNALS_DIR=${NNAS_EXTERNALS_DIR}"
- -DC_COMPILER=${ARM_C_COMPILER}
- -DCXX_COMPILER=${ARM_CXX_COMPILER}
- -DASM_COMPILER=${ARM_ASM_COMPILER}
- -DOBJCOPY=${ARM_OBJCOPY}
- -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
- -DENABLE_TEST=OFF
- -DBUILD_GTEST=OFF
- "-DNNAS_ROOT=${NNAS_PROJECT_SOURCE_DIR}"
- -DENABLE_STRICT_BUILD=OFF
-)
-
-set(MICRO_ARM_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/standalone_arm")
-file(MAKE_DIRECTORY "${MICRO_ARM_BUILD_DIR}")
-
-set(MICRO_ARM_BUILD_DEPENDENCY "${MICRO_ARM_BUILD_DIR}/CMakeCache.txt")
-
-add_custom_command(
- OUTPUT "${MICRO_ARM_BUILD_DEPENDENCY}"
- COMMAND "${CMAKE_COMMAND}" "${CMAKE_CURRENT_SOURCE_DIR}/standalone" ${CMAKE_ARM_OPTIONS}
- WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
- DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/standalone/CMakeLists.txt"
- VERBATIM
-)
-
-add_custom_target(luci_interpreter_micro_arm_cmake DEPENDS "${MICRO_ARM_BUILD_DEPENDENCY}")
-
-set(MICRO_ARM_BINARY "${MICRO_ARM_BUILD_DIR}/compiler/luci-interpreter/src/libluci_interpreter.a")
-
-add_custom_command(
- OUTPUT "${MICRO_ARM_BINARY}"
- COMMAND "${CMAKE_MAKE_PROGRAM}" luci_interpreter -j ${CPU_COUNT}
- WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
- DEPENDS luci_interpreter_micro_arm_cmake
- VERBATIM
-)
-
-add_custom_target(luci_interpreter_micro_arm DEPENDS "${MICRO_ARM_BINARY}")
+++ /dev/null
-set(LUCI_INTERPRETER_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
-set(LUCI_INTERPRETER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
-if (NOT LUCI_INTERPRETER_PAL_DIR)
- set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pal/linux")
-endif()
-
-set(KERNEL_REGISTER_FILE ${LUCI_INTERPRETER_PAL_DIR}/KernelsToBuild.lst)
-
-if (NOT DEFINED CUSTOM_LUCI_INTERPRETER_SUFFIX)
- set(LUCI_INTERPRETER_SUFFIX "")
-else()
- set(LUCI_INTERPRETER_SUFFIX ${CUSTOM_LUCI_INTERPRETER_SUFFIX})
-endif()
-
-add_subdirectory(src)
+++ /dev/null
-/* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci_interpreter/MemoryManager.h"
-
-#ifndef LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
-#define LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
-
-namespace luci_interpreter
-{
-
-class BuddyMemoryManager : public IMemoryManager
-{
-public:
- BuddyMemoryManager(uint8_t *memory_start, int32_t memSize);
-
- void allocate_memory(luci_interpreter::Tensor &tensor) final;
- void release_memory(luci_interpreter::Tensor &tensor) final;
-
-private:
- struct Block
- {
- Block *next_free;
- bool is_free;
- uint32_t size;
- // debug field
- Block *self;
- };
-
- Block *_start_block;
- int32_t _num_blocks;
- uint32_t _size;
- Block *_free_blocks[32]{};
-
- static int32_t lowerLog2(uint32_t val)
- {
- int32_t i = 0;
- while (val >>= 1)
- i++;
-
- return i;
- }
-
- void addToBlocks(Block *block, int32_t l)
- {
- if (!block)
- return;
-
- block->next_free = _free_blocks[l];
- _free_blocks[l] = block;
- }
-
- void removeFromBlocks(const Block *block, int32_t l)
- {
- if (!block)
- return;
-
- Block *tmp = _free_blocks[l];
-
- if (block == tmp)
- {
- _free_blocks[l] = block->next_free;
- return;
- }
-
- while (tmp)
- {
- if (tmp->next_free == block)
- {
- tmp->next_free = block->next_free;
- return;
- }
-
- tmp = tmp->next_free;
- }
- }
-
- void divideBlock(Block *block, int32_t l)
- {
- int32_t size = ((block->size + sizeof(Block)) / 2) - sizeof(Block);
-
- removeFromBlocks(block, l);
-
- // there is no need to add to the free_blocks list here
- block->is_free = true;
- block->size = size;
- block->self = block;
-
- Block *buddy;
- buddy = (Block *)((uint8_t *)block + sizeof(Block) + size);
- buddy->is_free = true;
- buddy->size = size;
- buddy->self = buddy;
-
- addToBlocks(buddy, l - 1);
- }
-
- Block *mergeBlock(Block *block)
- {
- Block *buddy;
-
- const int32_t l = lowerLog2(block->size + sizeof(Block));
-
- const int64_t address = ((uint8_t *)block - (uint8_t *)_start_block);
- buddy = (Block *)((address ^ (1 << l)) + (uint8_t *)_start_block);
-
- if (!buddy->is_free || buddy->size != block->size)
- return nullptr;
-
- if (block > buddy)
- {
- Block *x = block;
- block = buddy;
- buddy = x;
- }
-
- removeFromBlocks(block, l);
- removeFromBlocks(buddy, l);
-
- block->size = block->size * 2 + sizeof(Block);
- block->is_free = true;
- block->self = block;
-
- addToBlocks(block, l + 1);
-
- return block;
- }
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
-#define __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
-
-#include <luci/Import/GraphBuilderRegistry.h>
-
-namespace luci_interpreter
-{
-
-/**
- * @brief Creates and returns GraphBuilderSource, which allows to not copy constant buffers from
- * model's file.
- *
- * @warning Use this source only in case when model's buffer alive longer than Interpreter.
- */
-std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying();
-
-} // namespace luci_interpreter
-
-#endif // __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_INTERPRETER_H
-#define LUCI_INTERPRETER_INTERPRETER_H
-
-#include "luci_interpreter/core/Tensor.h"
-
-#include <luci/IR/Nodes/CircleInput.h>
-#include <luci/IR/Nodes/CircleOutput.h>
-
-#include "luci_interpreter/MemoryManager.h"
-#include <luci/IR/Module.h>
-
-#include <memory>
-#include <vector>
-#include <unordered_map>
-
-namespace luci_interpreter
-{
-
-class ExecutionObserver
-{
-public:
- virtual ~ExecutionObserver();
-
- // Called when the value of a tensor has been updated during execution.
- virtual void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor);
-
- // Called before / after executing an operator.
- // Note that these methods are not called for auxiliary operators (CircleInput, CircleOutput,
- // CircleConst and Circle*Out).
- virtual void preOperatorExecute(const luci::CircleNode *node);
- virtual void postOperatorExecute(const luci::CircleNode *node);
-};
-
-class Interpreter
-{
-public:
- explicit Interpreter(const luci::Module *module);
-
- explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager);
-
- ~Interpreter();
-
- void writeInputTensor(const luci::CircleInput *input_node, const void *data, size_t data_size);
-
- void readOutputTensor(const luci::CircleOutput *output_node, void *data, size_t data_size);
-
- void interpret();
-
- void attachObserver(ExecutionObserver *observer);
-
- const Tensor *getTensor(const loco::Node *node) { return _node_to_tensor[node]; }
-
-private:
- // _default_memory_manager should be before _runtime_module due to
- // the order of deletion in the destructor
- std::unique_ptr<IMemoryManager> _default_memory_manager = nullptr;
- std::unique_ptr<class RuntimeModule> _runtime_module;
-
- // Observer functionality support.
- std::unique_ptr<struct RuntimeToIR> _runtime_to_ir;
- std::unordered_map<const loco::Node *, Tensor *> _node_to_tensor;
- std::unique_ptr<class EventNotifier> _event_notifier;
- std::vector<ExecutionObserver *> _observers;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_INTERPRETER_H
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_MEMORY_MANAGER_H
-#define LUCI_INTERPRETER_MEMORY_MANAGER_H
-
-#include "luci_interpreter/core/DataType.h"
-#include "luci_interpreter/core/Tensor.h"
-
-namespace luci_interpreter
-{
-
-class IMemoryManager
-{
-public:
- virtual void allocate_memory(luci_interpreter::Tensor &tensor) = 0;
- virtual void release_memory(luci_interpreter::Tensor &tensor) = 0;
-
- virtual ~IMemoryManager() = default;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_MEMORY_MANAGER_H
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
-#define LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
-
-#include "luci_interpreter/MemoryManager.h"
-
-namespace luci_interpreter
-{
-
-class SimpleMemoryManager : public IMemoryManager
-{
-public:
- void allocate_memory(luci_interpreter::Tensor &tensor) final;
- void release_memory(luci_interpreter::Tensor &tensor) final;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
-#define LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
-
-#include "luci_interpreter/MemoryManager.h"
-
-namespace luci_interpreter
-{
-
-// Used for allocations in static buffer, using offsets defined in luci model.
-class StaticMemoryManager : public IMemoryManager
-{
-public:
- StaticMemoryManager() = delete;
-
- explicit StaticMemoryManager(uint8_t *buffer_ptr) : _buffer_ptr(buffer_ptr)
- { /* Do nothing */
- }
-
- void allocate_memory(luci_interpreter::Tensor &tensor) final;
- void release_memory(luci_interpreter::Tensor &tensor) final;
-
-private:
- // Stores a pointer to the beginning of the allocated memory buffer.
- uint8_t *_buffer_ptr;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
-#define LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
-
-#include "luci_interpreter/MemoryManager.h"
-
-namespace luci_interpreter
-{
-// Memory Manager for using in kernels tests. This eliminates the need to manually delete the
-// allocated memory in tests. This mem_manager remembers all its allocations and in destructor
-// delete all allocations.
-class TestMemoryManager : public IMemoryManager
-{
-public:
- void allocate_memory(luci_interpreter::Tensor &tensor) final;
- void release_memory(luci_interpreter::Tensor &tensor) final;
-
- ~TestMemoryManager() override
- {
- for (auto allocation : allocations)
- {
- delete[] allocation;
- }
- }
-
-private:
- std::vector<uint8_t *> allocations;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_CORE_DATATYPE_H
-#define LUCI_INTERPRETER_CORE_DATATYPE_H
-
-#include <loco/IR/DataType.h>
-#include <loco/IR/DataTypeTraits.h>
-
-#include <cstddef>
-
-namespace luci_interpreter
-{
-
-using DataType = loco::DataType;
-
-template <DataType DT> using DataTypeImpl = loco::DataTypeImpl<DT>;
-
-inline size_t getDataTypeSize(DataType data_type) { return loco::size(data_type); }
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_CORE_DATATYPE_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_CORE_TENSOR_H
-#define LUCI_INTERPRETER_CORE_TENSOR_H
-
-#include "luci_interpreter/core/DataType.h"
-
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-namespace luci_interpreter
-{
-
-class Shape
-{
-public:
- explicit Shape(int rank) : _dims(rank, 0) {}
-
- Shape(std::initializer_list<int32_t> dims) : _dims(dims.begin(), dims.end()) {}
-
- int num_dims() const { return _dims.size(); }
-
- int32_t dim(int i) const
- {
- assert(i >= 0 && i < static_cast<int>(_dims.size()));
- return _dims[i];
- }
-
- int32_t &dim(int i)
- {
- assert(i >= 0 && i < static_cast<int>(_dims.size()));
- return _dims[i];
- }
-
- int32_t num_elements() const
- {
- int32_t result = 1;
- for (const int32_t dim : _dims)
- {
- result *= dim;
- }
- return result;
- }
-
- bool operator==(const Shape &other) const { return _dims == other._dims; }
-
- bool operator!=(const Shape &other) const { return !operator==(other); }
-
-private:
- std::vector<int32_t> _dims;
-};
-
-// Tensor affine quantization parameters.
-//
-// The relationship between real and quantized values:
-// real_value = (quantized_value - zero_point) * scale
-//
-// In per-tensor case, 'scale' and 'zero_point' are one element each.
-// In per-channel case, 'scale' and 'zero_point' are N elements each, where N is the size
-// of the quantized dimension.
-//
-// Note that due to historical and performance reasons, per-tensor quantization uses unsigned
-// integer types, while per-channel uses signed types assuming 'zero_point' == 0.
-struct AffineQuantization
-{
- std::vector<float> scale;
- std::vector<int32_t> zero_point;
- int32_t quantized_dimension;
-};
-
-class Tensor
-{
-public:
- Tensor(DataType element_type, Shape shape, AffineQuantization quantization, std::string name);
-
- DataType element_type() const { return _element_type; }
-
- const Shape &shape() const { return _shape; }
-
- float scale() const
- {
- assert(_quantization.scale.size() == 1);
- return _quantization.scale[0];
- }
-
- int32_t zero_point() const
- {
- assert(_quantization.zero_point.size() == 1);
- return _quantization.zero_point[0];
- }
-
- const std::vector<float> &scales() const { return _quantization.scale; }
-
- const std::vector<int32_t> &zero_points() const { return _quantization.zero_point; }
-
- int32_t quantized_dimension() const { return _quantization.quantized_dimension; }
-
- template <typename T> const T *data() const
- {
- static_assert(std::is_same<uint8_t, char>::value or
- std::is_same<uint8_t, unsigned char>::value);
- return reinterpret_cast<const T *>(_data);
- }
-
- template <typename T> T *data()
- {
- static_assert(std::is_same<uint8_t, char>::value or
- std::is_same<uint8_t, unsigned char>::value);
- return reinterpret_cast<T *>(_data);
- }
-
- const std::string &name() const { return _name; }
-
- void readData(void *data_ptr, size_t data_size) const;
-
- void writeData(const void *data_ptr, size_t data_size);
-
- void resize(const Shape &new_shape);
-
- void set_data_buffer(uint8_t *buffer)
- {
- if (buffer == nullptr)
- {
- _data_allocated = false;
- }
- else
- {
- _data_allocated = true;
- }
- _data = buffer;
- }
-
- bool is_observable() const { return _is_observable; }
-
- void set_observable(bool value) { _is_observable = value; }
-
- bool is_allocatable() const { return _is_allocatable; }
-
- void set_allocatable(bool value) { _is_allocatable = value; }
-
- bool is_data_allocated() const { return _data_allocated; }
-
- int32_t get_offset() const { return _offset; }
-
- void set_offset(int32_t offset) { _offset = offset; }
-
-private:
- DataType _element_type;
- Shape _shape;
- AffineQuantization _quantization;
- uint8_t *_data;
- std::string _name;
- bool _data_allocated;
- // Write of tensor is reported to registered Observers only if this tensor is observable
- // This is needed for tensors used in kernel implementation, but not present in original model.
- bool _is_observable = true;
- // Memory manager is called for tensor only if it is "allocatable".
- // Kernel configuration could disable allocation of some tensors if they are not needed for
- // particular operation.
- bool _is_allocatable = true;
- // Used by static memory manager.
- // Stores the offset from the beginning of the allocated memory buffer.
- int32_t _offset = -1;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_CORE_TENSOR_H
+++ /dev/null
-REGISTER_KERNEL(Add)
-REGISTER_KERNEL(ArgMax)
-REGISTER_KERNEL(AveragePool2D)
-REGISTER_KERNEL(BatchToSpaceND)
-REGISTER_KERNEL(Cast)
-REGISTER_KERNEL(Concatenation)
-REGISTER_KERNEL(Conv2D)
-REGISTER_KERNEL(DepthToSpace)
-REGISTER_KERNEL(DepthwiseConv2D)
-REGISTER_KERNEL(Dequantize)
-REGISTER_KERNEL(Div)
-REGISTER_KERNEL(Elu)
-REGISTER_KERNEL(Exp)
-REGISTER_KERNEL(ExpandDims)
-REGISTER_KERNEL(Fill)
-REGISTER_KERNEL(Floor)
-REGISTER_KERNEL(FloorDiv)
-REGISTER_KERNEL(Equal)
-REGISTER_KERNEL(FullyConnected)
-REGISTER_KERNEL(Greater)
-REGISTER_KERNEL(GreaterEqual)
-REGISTER_KERNEL(If)
-REGISTER_KERNEL(InstanceNorm)
-REGISTER_KERNEL(L2Normalize)
-REGISTER_KERNEL(L2Pool2D)
-REGISTER_KERNEL(LeakyRelu)
-REGISTER_KERNEL(Less)
-REGISTER_KERNEL(LessEqual)
-REGISTER_KERNEL(LogicalAnd)
-REGISTER_KERNEL(LogicalNot)
-REGISTER_KERNEL(LogicalOr)
-REGISTER_KERNEL(Logistic)
-REGISTER_KERNEL(Maximum)
-REGISTER_KERNEL(MaxPool2D)
-REGISTER_KERNEL(Minimum)
-REGISTER_KERNEL(MirrorPad)
-REGISTER_KERNEL(Mul)
-REGISTER_KERNEL(Neg)
-REGISTER_KERNEL(NotEqual)
-REGISTER_KERNEL(Pad)
-REGISTER_KERNEL(PadV2)
-REGISTER_KERNEL(PRelu)
-REGISTER_KERNEL(Quantize)
-REGISTER_KERNEL(Reshape)
-REGISTER_KERNEL(ResizeBilinear)
-REGISTER_KERNEL(ResizeNearestNeighbor)
-REGISTER_KERNEL(Rsqrt)
-REGISTER_KERNEL(Shape)
-REGISTER_KERNEL(Softmax)
-REGISTER_KERNEL(SpaceToBatchND)
-REGISTER_KERNEL(SpaceToDepth)
-REGISTER_KERNEL(StridedSlice)
-REGISTER_KERNEL(Sqrt)
-REGISTER_KERNEL(Square)
-REGISTER_KERNEL(SquaredDifference)
-REGISTER_KERNEL(Squeeze)
-REGISTER_KERNEL(Sub)
-REGISTER_KERNEL(SVDF)
-REGISTER_KERNEL(Tanh)
-REGISTER_KERNEL(Transpose)
-REGISTER_KERNEL(TransposeConv)
-REGISTER_KERNEL(While)
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
-#define LUCI_INTERPRETER_PAL_ARGMAX_H
-
-#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
-
-namespace luci_interpreter_pal
-{
-template <typename T>
-static inline void
-BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
- const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
- const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
- const tflite::RuntimeShape &unextended_output_shape, T *output_data)
-{
- tflite::reference_ops::BatchToSpaceND(
- unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
- unextended_input3_shape, crops_data, unextended_output_shape, output_data);
-}
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
-#define LUCI_INTERPRETER_PAL_CONV2D_H
-
-#include <tensorflow/lite/kernels/internal/reference/conv.h>
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
-#include <arm_nn_types.h>
-#include <arm_nnfunctions.h>
-
-namespace luci_interpreter_pal
-{
-static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape,
- const float *input_data, const tflite::RuntimeShape &filter_shape,
- const float *filter_data, const tflite::RuntimeShape &bias_shape,
- const float *bias_data, const tflite::RuntimeShape &output_shape,
- float *output_data, const tflite::RuntimeShape &scratchpad_shape,
- float *scratchpad_data)
-{
- (void)scratchpad_shape;
- (void)scratchpad_data;
- tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
- bias_shape, bias_data, output_shape, output_data,
- tflite::RuntimeShape(), nullptr);
-}
-
-static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape,
- const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
- const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
- const int32 *bias_data, const tflite::RuntimeShape &output_shape,
- uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
- uint8 *scratchpad_data)
-{
- (void)scratchpad_shape;
- (void)scratchpad_data;
- tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
- bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
- scratchpad_data, nullptr);
-}
-
-static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult,
- const int32_t *shifts, const tflite::RuntimeShape &input_shape,
- const int8 *input_data, const tflite::RuntimeShape &filter_shape,
- const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
- const int32 *bias_data, const tflite::RuntimeShape &output_shape,
- int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
- int8 *scratchpad_data)
-{
- if (scratchpad_data)
- {
- cmsis_nn_conv_params conv_params;
- conv_params.dilation.h = params.dilation_height_factor;
- conv_params.dilation.w = params.dilation_width_factor;
-
- assert(conv_params.dilation.h == 1);
- assert(conv_params.dilation.w == 1);
-
- conv_params.input_offset = params.input_offset;
- conv_params.output_offset = params.output_offset;
- conv_params.stride.h = params.stride_height;
- conv_params.stride.w = params.stride_width;
- conv_params.padding.h = params.padding_values.height;
- conv_params.padding.w = params.padding_values.width;
- conv_params.activation.min = params.quantized_activation_min;
- conv_params.activation.max = params.quantized_activation_max;
-
- cmsis_nn_per_channel_quant_params quant_params;
- quant_params.multiplier = const_cast<int32_t *>(mult);
- quant_params.shift = const_cast<int32_t *>(shifts);
-
- assert(conv_params.activation.min <= conv_params.activation.max);
- assert(input_shape.DimensionsCount() == 4);
- assert(filter_shape.DimensionsCount() == 4);
- assert(output_shape.DimensionsCount() == 4);
- const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
- const int input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
- const int output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
- if (bias_data)
- {
- assert(bias_shape.FlatSize() == output_depth);
- }
-
- cmsis_nn_dims input_dims;
- input_dims.n = batch_size;
- input_dims.h = input_shape.Dims(1);
- input_dims.w = input_shape.Dims(2);
- input_dims.c = input_depth;
-
- cmsis_nn_dims filter_dims;
- filter_dims.n = output_depth;
- filter_dims.h = filter_shape.Dims(1);
- filter_dims.w = filter_shape.Dims(2);
- filter_dims.c = input_depth;
-
- cmsis_nn_dims bias_dims;
- bias_dims.n = 1;
- bias_dims.h = 1;
- bias_dims.w = 1;
- bias_dims.c = output_depth;
-
- cmsis_nn_dims output_dims;
- output_dims.n = batch_size;
- output_dims.h = output_shape.Dims(1);
- output_dims.w = output_shape.Dims(2);
- output_dims.c = output_depth;
-
- cmsis_nn_context ctx;
- ctx.buf = scratchpad_data;
- ctx.size = scratchpad_shape.Dims(0);
-
- auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data,
- &filter_dims, filter_data, &bias_dims, bias_data,
- &output_dims, output_data);
- assert(res == ARM_MATH_SUCCESS);
- }
- else
- {
- tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
- filter_shape, filter_data, bias_shape, bias_data,
- output_shape, output_data);
- }
-}
-
-static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
- const luci_interpreter::DataType &input_data_type,
- const tflite::ConvParams ¶ms,
- const tflite::RuntimeShape &input_shape,
- const tflite::RuntimeShape &filter_shape,
- const tflite::RuntimeShape &output_shape)
-{
- cmsis_nn_conv_params conv_params;
- conv_params.dilation.h = params.dilation_height_factor;
- conv_params.dilation.w = params.dilation_width_factor;
-
- if (input_data_type == loco::DataType::S8 && conv_params.dilation.h == 1 &&
- conv_params.dilation.w == 1)
- {
- const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
- const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
- const int32_t output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
- const int32_t filter_height = filter_shape.Dims(1);
- const int32_t filter_width = filter_shape.Dims(2);
- const int32_t output_height = output_shape.Dims(1);
- const int32_t output_width = output_shape.Dims(2);
-
- conv_params.input_offset = params.input_offset;
- conv_params.output_offset = params.output_offset;
- conv_params.stride.h = params.stride_height;
- conv_params.stride.w = params.stride_width;
- conv_params.padding.h = params.padding_values.height;
- conv_params.padding.w = params.padding_values.width;
-
- cmsis_nn_dims input_dims;
- input_dims.n = batches;
- input_dims.h = input_shape.Dims(1);
- input_dims.w = input_shape.Dims(2);
- input_dims.c = input_depth;
-
- cmsis_nn_dims filter_dims;
- filter_dims.n = output_depth;
- filter_dims.h = filter_height;
- filter_dims.w = filter_width;
- filter_dims.c = input_depth;
-
- cmsis_nn_dims output_dims;
- output_dims.n = batches;
- output_dims.h = output_height;
- output_dims.w = output_width;
- output_dims.c = output_depth;
-
- const int32_t buf_size = arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims,
- &filter_dims, &output_dims);
-
- luci_interpreter::Shape scratchpad_shape{buf_size};
- scratchpad->resize(scratchpad_shape);
- }
- else
- {
- scratchpad->set_allocatable(false);
- }
-}
-
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_CONV2D_H
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
-#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
-
-#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
-#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
-#include <arm_nnfunctions.h>
-
-namespace luci_interpreter_pal
-{
-template <typename T>
-static inline void
-DepthwiseConvPerChannel(const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier,
- const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
- const T *input_data, const tflite::RuntimeShape &filter_shape,
- const T *filter_data, const tflite::RuntimeShape &bias_shape,
- const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
- T *output_data, const tflite::RuntimeShape &scratchpad_shape,
- T *scratchpad_data)
-{
- {
- // MARK: At this moment this operation is not supported
- assert(false && "DepthwiseConvPerChannel NYI");
- (void)params;
- (void)output_multiplier;
- (void)output_shift;
- (void)input_shape;
- (void)output_data;
- (void)input_data;
- (void)filter_shape;
- (void)filter_data;
- (void)bias_shape;
- (void)bias_data;
- (void)output_shape;
- (void)output_data;
- (void)scratchpad_shape;
- (void)scratchpad_data;
- }
-}
-
-template <>
-inline void DepthwiseConvPerChannel<int8_t>(
- const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier,
- const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
- const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
- const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
- const tflite::RuntimeShape &output_shape, int8_t *output_data,
- const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
-{
- if (scratchpad_data)
- {
- cmsis_nn_dw_conv_params dw_conv_params;
- dw_conv_params.dilation.h = params.dilation_height_factor;
- dw_conv_params.dilation.w = params.dilation_width_factor;
- assert(dw_conv_params.dilation.h == 1);
- assert(dw_conv_params.dilation.w == 1);
-
- dw_conv_params.input_offset = params.input_offset;
- dw_conv_params.output_offset = params.output_offset;
- dw_conv_params.stride.h = params.stride_height;
- dw_conv_params.stride.w = params.stride_width;
- dw_conv_params.padding.h = params.padding_values.height;
- dw_conv_params.padding.w = params.padding_values.width;
-
- dw_conv_params.activation.min = params.quantized_activation_min;
- dw_conv_params.activation.max = params.quantized_activation_max;
- dw_conv_params.ch_mult = params.depth_multiplier;
-
- cmsis_nn_per_channel_quant_params quant_params;
- int32_t output_multiplier = params.output_multiplier;
- int32_t output_shift = params.output_shift;
-
- quant_params.multiplier = &output_multiplier;
- quant_params.shift = &output_shift;
-
- assert(dw_conv_params.activation.min <= dw_conv_params.activation.max);
- const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
- const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
- if (bias_data)
- {
- assert(bias_shape.FlatSize() == output_depth);
- }
-
- cmsis_nn_dims input_dims;
- input_dims.n = batch_size;
- input_dims.h = input_shape.Dims(1);
- input_dims.w = input_shape.Dims(2);
- input_dims.c = input_shape.Dims(3);
-
- cmsis_nn_dims filter_dims;
- filter_dims.n = filter_shape.Dims(0);
- filter_dims.h = filter_shape.Dims(1);
- filter_dims.w = filter_shape.Dims(2);
- filter_dims.c = output_depth;
-
- cmsis_nn_dims bias_dims;
- bias_dims.n = 1;
- bias_dims.h = 1;
- bias_dims.w = 1;
- bias_dims.c = output_depth;
-
- cmsis_nn_dims output_dims;
- output_dims.n = batch_size;
- output_dims.h = output_shape.Dims(1);
- output_dims.w = output_shape.Dims(2);
- output_dims.c = output_depth;
-
- cmsis_nn_context ctx;
- ctx.buf = scratchpad_data;
- ctx.size = scratchpad_shape.Dims(0);
-
- auto res = arm_depthwise_conv_wrapper_s8(&ctx, &dw_conv_params, &quant_params, &input_dims,
- input_data, &filter_dims, filter_data, &bias_dims,
- bias_data, &output_dims, output_data);
- assert(res == ARM_MATH_SUCCESS);
- }
- else
- {
- tflite::reference_integer_ops::DepthwiseConvPerChannel(
- params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
- bias_shape, bias_data, output_shape, output_data);
- }
-}
-
-static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
- const tflite::DepthwiseParams ¶ms,
- const luci_interpreter::DataType &input_data_type,
- const tflite::RuntimeShape &input_shape,
- const tflite::RuntimeShape &filter_shape,
- const tflite::RuntimeShape &output_shape)
-{
- cmsis_nn_dw_conv_params dw_conv_params;
- dw_conv_params.dilation.h = params.dilation_height_factor;
- dw_conv_params.dilation.w = params.dilation_width_factor;
-
- if (input_data_type == loco::DataType::S8 && dw_conv_params.dilation.h == 1 &&
- dw_conv_params.dilation.w == 1)
- {
- const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
- const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
-
- cmsis_nn_dims input_dims;
- input_dims.n = batch_size;
- input_dims.h = input_shape.Dims(1);
- input_dims.w = input_shape.Dims(2);
- input_dims.c = input_shape.Dims(3);
-
- cmsis_nn_dims filter_dims;
- filter_dims.n = filter_shape.Dims(0);
- filter_dims.h = filter_shape.Dims(1);
- filter_dims.w = filter_shape.Dims(2);
- filter_dims.c = output_depth;
-
- cmsis_nn_dims output_dims;
- output_dims.n = batch_size;
- output_dims.h = output_shape.Dims(1);
- output_dims.w = output_shape.Dims(2);
- output_dims.c = output_depth;
-
- const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
- &dw_conv_params, &input_dims, &filter_dims, &output_dims);
-
- auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
-
- luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
- scratchpad->resize(scratchpad_shape);
- }
- else
- {
- scratchpad->set_allocatable(false);
- }
-}
-
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
-#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
-
-#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
-
-namespace luci_interpreter_pal
-{
-
-template <typename T>
-static inline void Dequantize(tflite::DequantizationParams ¶ms,
- const tflite::RuntimeShape &input_shape, const T *input_data,
- const tflite::RuntimeShape &output_shape, float *output_data)
-{
- tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
- output_data);
-}
-
-static inline void Dequantize(tflite::DequantizationParams ¶ms,
- const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
- const tflite::RuntimeShape &output_shape, float *output_data)
-{
- tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
-}
-
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
-#define LUCI_INTERPRETER_PAL_QUANTIZE_H
-
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
-
-namespace luci_interpreter_pal
-{
-template <typename T>
-static inline void Quantize(tflite::QuantizationParams ¶ms,
- const tflite::RuntimeShape &input_shape, const float *input_data,
- const tflite::RuntimeShape &output_shape, T *output_data)
-{
- tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
-}
-
-template <typename Input, typename Output>
-static inline void Requantize(const Input *input_data, int32_t size,
- int32_t effective_scale_multiplier, int32_t effective_scale_shift,
- int32_t input_zero_point, int32_t output_zero_point,
- Output *output_data)
-{
- tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
- effective_scale_shift, input_zero_point, output_zero_point,
- output_data);
-}
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
-#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
-
-#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h>
-
-namespace luci_interpreter_pal
-{
-template <typename T>
-static inline void
-ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
- const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
- const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
- const tflite::RuntimeShape &unextended_output_shape, T *output_data)
-{
- tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
- output_size_shape, output_size_data,
- unextended_output_shape, output_data);
-}
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
-#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
-
-#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
-
-namespace luci_interpreter_pal
-{
-template <typename T>
-static inline void
-ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
- const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
- const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
- const tflite::RuntimeShape &unextended_output_shape, T *output_data)
-{
- tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
- output_size_shape, output_size_data,
- unextended_output_shape, output_data);
-}
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_SVDF_H
-#define LUCI_INTERPRETER_PAL_SVDF_H
-
-#include <arm_nn_types.h>
-#include <arm_nnfunctions.h>
-
-namespace luci_interpreter_pal
-{
-static inline void
-IntegerSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape,
- const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
- const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
- const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
- const int32_t *bias_data, int16_t *activation_state_data,
- const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
- int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
- int scale_2_b, int32_t input_zp, int32_t output_zp)
-{
- const int32_t rank = params.rank;
- const int32_t batch_size = input_shape.Dims(0);
- const int32_t num_filters = weight_feature_shape.Dims(0);
- const int32_t memory_size = weight_time_shape.Dims(1);
-
- cmsis_nn_dims input_dims;
- input_dims.n = input_shape.Dims(0);
- input_dims.h = input_shape.Dims(1);
-
- cmsis_nn_dims weights_feature_dims;
- weights_feature_dims.n = weight_feature_shape.Dims(0);
- weights_feature_dims.h = weight_feature_shape.Dims(1);
-
- cmsis_nn_dims weights_time_dims;
- weights_time_dims.n = weight_time_shape.Dims(0);
- weights_time_dims.h = weight_time_shape.Dims(1);
-
- cmsis_nn_dims bias_dims;
- bias_dims.n = bias_shape.Dims(0);
-
- cmsis_nn_dims state_dims;
- state_dims.n = batch_size;
- state_dims.h = memory_size * num_filters;
-
- cmsis_nn_dims output_dims;
- output_dims.n = output_shape.Dims(0);
- output_dims.h = output_shape.Dims(1);
-
- cmsis_nn_svdf_params svdf_params;
- svdf_params.rank = params.rank;
- svdf_params.input_offset = input_zp;
- svdf_params.output_offset = output_zp;
-
- svdf_params.input_activation.min = INT16_MIN;
- svdf_params.input_activation.max = INT16_MAX;
-
- svdf_params.output_activation.min = INT8_MIN;
- svdf_params.output_activation.max = INT8_MAX;
-
- cmsis_nn_per_tensor_quant_params in_quant_params;
- in_quant_params.multiplier = scale_1_a;
- in_quant_params.shift = scale_1_b;
-
- cmsis_nn_per_tensor_quant_params out_quant_params;
- out_quant_params.multiplier = scale_2_a;
- out_quant_params.shift = scale_2_b;
-
- cmsis_nn_context scratch_ctx;
- scratch_ctx.buf = scratchpad_data;
-
- cmsis_nn_context scratch_output_ctx;
- scratch_output_ctx.buf = output_temp_data;
-
- arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params,
- &input_dims, input_data, &state_dims, activation_state_data, &weights_feature_dims,
- weight_feature_data, &weights_time_dims, weight_time_data, &bias_dims, bias_data,
- &output_dims, output_data);
-}
-static inline void
-FloatSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape,
- const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
- const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
- const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
- const float *bias_data, float *scratchpad_data, float *activation_state_data,
- const tflite::RuntimeShape &output_shape, float *output_data)
-{
- const int32_t rank = params.rank;
- const int32_t batch_size = input_shape.Dims(0);
- const int32_t input_size = input_shape.Dims(1);
- const int32_t num_filters = weight_feature_shape.Dims(0);
- const int32_t num_units = num_filters / rank;
- const int32_t memory_size = weight_time_shape.Dims(1);
-
- // Left shift the activation_state.
- {
- float *new_state_start = activation_state_data;
- const float *old_state_start = activation_state_data + 1;
- const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
- while (old_state_start != old_state_end)
- {
- *new_state_start++ = *old_state_start++;
- }
- }
-
- // Note: no need to clear the latest activation, matmul is not accumulative.
-
- // Compute conv1d(inputs, weights_feature).
- // The activation_state's rightmost column is used to save current cycle
- // activation. This is achieved by starting at state_ptr[memory_size - 1] and
- // having the stride equal to memory_size.
-
- // Perform batched matrix vector multiply operation:
- {
- const float *matrix = weight_feature_data;
- const float *vector = input_data;
- float *result = &activation_state_data[memory_size - 1];
- float *result_in_batch = result;
- for (int i = 0; i < batch_size; ++i)
- {
- const float *matrix_ptr = matrix;
- for (int j = 0; j < num_filters; ++j)
- {
- float dot_prod = 0.0f;
- const float *vector_in_batch = vector + i * input_size;
- for (int k = 0; k < input_size; ++k)
- {
- dot_prod += *matrix_ptr++ * *vector_in_batch++;
- }
- *result_in_batch = dot_prod;
- result_in_batch += memory_size;
- }
- }
- }
-
- tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
- batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
- params.activation, activation_state_data, scratchpad_data, output_data);
-}
-
-static inline void SetupScratchpadTensor(
- const luci_interpreter::DataType &input_data_type,
- const luci_interpreter::DataType &weight_feature_data_type,
- luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
- luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
- luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
- const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
- const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
-{
- if (input_data_type == loco::DataType::FLOAT32 &&
- (weight_feature_data_type == loco::DataType::S8 ||
- weight_feature_data_type == loco::DataType::U8))
- {
- (void)input_shape;
- (void)weight_time_shape;
- (void)scratchpad_3;
- (void)scratchpad_4;
- (void)scratchpad_5;
- (void)scratchpad_6;
-
- throw std::runtime_error("Hybrid type is not supported for cmsisnn");
- }
-
- // Resize scratchpad_1 tensor
- scratchpad_1->resize({batch_size, num_filters});
-
- if (input_data_type == loco::DataType::S8)
- {
- // Resize scratchpad_2 for full_integer op
- scratchpad_2->resize({batch_size, num_units});
- }
-}
-
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_SVDF_H
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
-#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
-
-#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h>
-
-namespace luci_interpreter_pal
-{
-template <typename T>
-static inline void
-SpaceToBatchND(const tflite::SpaceToBatchParams ¶ms,
- const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
- const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
- const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
- const tflite::RuntimeShape &unextended_output_shape, T *output_data)
-{
- tflite::reference_ops::SpaceToBatchND(
- params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
- unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
-}
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+++ /dev/null
-macro(initialize_pal)
- nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
- nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
- nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
- nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
- nnas_find_package(CMSISSource EXACT 5.8.0 QUIET)
-
- if (NOT TensorFlowSource_FOUND)
- message(STATUS "Skipping luci-interpreter: TensorFlow not found")
- return()
- endif ()
-
- if (NOT TensorFlowGEMMLowpSource_FOUND)
- message(STATUS "Skipping luci-interpreter: gemmlowp not found")
- return()
- endif ()
-
- if (NOT TensorFlowEigenSource_FOUND)
- message(STATUS "Skipping luci-interpreter: Eigen not found")
- return()
- endif ()
-
- if (NOT TensorFlowRuySource_FOUND)
- message(STATUS "Skipping luci-interpreter: Ruy not found")
- return()
- endif ()
-
- if (NOT CMSISSource_FOUND)
- message(STATUS "Skipping luci-interpreter: CMSISSource not found")
- return()
- endif ()
-
- set(PAL_INITIALIZED TRUE)
-endmacro()
-
-macro(add_pal_to_target TGT)
- target_include_directories(${TGT} PRIVATE "${PAL}")
- target_include_directories(${TGT} PRIVATE
- "${TensorFlowRuySource_DIR}"
- "${TensorFlowGEMMLowpSource_DIR}"
- "${TensorFlowEigenSource_DIR}"
- "${TensorFlowSource_DIR}")
- target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
-
- file(GLOB_RECURSE PAL_SOURCES "${CMSISSource_DIR}/CMSIS/NN/Source/*.c")
- list(APPEND PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
- ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
- ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
- add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES})
- set_property(TARGET luci_interpreter_cmsisnn_pal PROPERTY POSITION_INDEPENDENT_CODE ON)
- target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
- "${TensorFlowRuySource_DIR}"
- "${TensorFlowGEMMLowpSource_DIR}"
- "${TensorFlowEigenSource_DIR}"
- "${TensorFlowSource_DIR}"
- )
-
- add_subdirectory(${CMSISSource_DIR}/CMSIS/NN ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN)
- target_include_directories(luci_interpreter_cmsisnn_pal PUBLIC
- "${CMSISSource_DIR}/CMSIS/NN/Include"
- "${CMSISSource_DIR}/CMSIS/DSP/Include"
- "${CMSISSource_DIR}/CMSIS/Core/Include")
-
- target_link_libraries(${TGT} PRIVATE luci_interpreter_cmsisnn_pal)
-endmacro()
+++ /dev/null
-REGISTER_KERNEL(Add)
-REGISTER_KERNEL(ArgMax)
-REGISTER_KERNEL(AveragePool2D)
-REGISTER_KERNEL(BatchMatMul)
-REGISTER_KERNEL(BatchToSpaceND)
-REGISTER_KERNEL(Cast)
-REGISTER_KERNEL(Concatenation)
-REGISTER_KERNEL(Conv2D)
-REGISTER_KERNEL(DepthToSpace)
-REGISTER_KERNEL(DepthwiseConv2D)
-REGISTER_KERNEL(Dequantize)
-REGISTER_KERNEL(Div)
-REGISTER_KERNEL(Elu)
-REGISTER_KERNEL(Exp)
-REGISTER_KERNEL(ExpandDims)
-REGISTER_KERNEL(Fill)
-REGISTER_KERNEL(Floor)
-REGISTER_KERNEL(FloorDiv)
-REGISTER_KERNEL(Equal)
-REGISTER_KERNEL(FullyConnected)
-REGISTER_KERNEL(Gather)
-REGISTER_KERNEL(Greater)
-REGISTER_KERNEL(GreaterEqual)
-REGISTER_KERNEL(If)
-REGISTER_KERNEL(InstanceNorm)
-REGISTER_KERNEL(L2Normalize)
-REGISTER_KERNEL(L2Pool2D)
-REGISTER_KERNEL(LeakyRelu)
-REGISTER_KERNEL(Less)
-REGISTER_KERNEL(LessEqual)
-REGISTER_KERNEL(LocalResponseNormalization)
-REGISTER_KERNEL(LogicalAnd)
-REGISTER_KERNEL(LogicalNot)
-REGISTER_KERNEL(LogicalOr)
-REGISTER_KERNEL(Logistic)
-REGISTER_KERNEL(LogSoftmax)
-REGISTER_KERNEL(Maximum)
-REGISTER_KERNEL(MaxPool2D)
-REGISTER_KERNEL(Mean)
-REGISTER_KERNEL(Minimum)
-REGISTER_KERNEL(MirrorPad)
-REGISTER_KERNEL(Mul)
-REGISTER_KERNEL(Neg)
-REGISTER_KERNEL(NotEqual)
-REGISTER_KERNEL(OneHot)
-REGISTER_KERNEL(Pack)
-REGISTER_KERNEL(Pad)
-REGISTER_KERNEL(PadV2)
-REGISTER_KERNEL(Pow)
-REGISTER_KERNEL(PRelu)
-REGISTER_KERNEL(Quantize)
-REGISTER_KERNEL(Relu)
-REGISTER_KERNEL(Relu6)
-REGISTER_KERNEL(Reshape)
-REGISTER_KERNEL(ResizeBilinear)
-REGISTER_KERNEL(ResizeNearestNeighbor)
-REGISTER_KERNEL(ReverseV2)
-REGISTER_KERNEL(Rsqrt)
-REGISTER_KERNEL(Shape)
-REGISTER_KERNEL(Slice)
-REGISTER_KERNEL(Softmax)
-REGISTER_KERNEL(SpaceToBatchND)
-REGISTER_KERNEL(SpaceToDepth)
-REGISTER_KERNEL(Split)
-REGISTER_KERNEL(SplitV)
-REGISTER_KERNEL(StridedSlice)
-REGISTER_KERNEL(Sqrt)
-REGISTER_KERNEL(Square)
-REGISTER_KERNEL(SquaredDifference)
-REGISTER_KERNEL(Squeeze)
-REGISTER_KERNEL(Sub)
-REGISTER_KERNEL(SVDF)
-REGISTER_KERNEL(Tanh)
-REGISTER_KERNEL(Transpose)
-REGISTER_KERNEL(TransposeConv)
-REGISTER_KERNEL(Unpack)
-REGISTER_KERNEL(While)
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_SVDF_H
-#define LUCI_INTERPRETER_PAL_SVDF_H
-
-#include <tensorflow/lite/kernels/internal/reference/svdf.h>
-
-namespace luci_interpreter_pal
-{
-static inline void
-IntegerSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape,
- const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
- const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
- const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
- const int32_t *bias_data, int16_t *activation_state_data,
- const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
- int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
- int scale_2_b, int32_t input_zp, int32_t output_zp)
-{
- tflite::reference_ops::EvalIntegerSVDF(¶ms, input_shape, input_data, weight_feature_shape,
- weight_feature_data, weight_time_shape, weight_time_data,
- bias_shape, bias_data, activation_state_data, output_shape,
- output_data, scratchpad_data, output_temp_data, scale_1_a,
- scale_1_b, scale_2_a, scale_2_b, input_zp, output_zp);
-}
-static inline void
-FloatSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape,
- const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
- const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
- const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
- const float *bias_data, float *scratchpad_data, float *activation_state_data,
- const tflite::RuntimeShape &output_shape, float *output_data)
-{
- tflite::reference_ops::EvalFloatSVDF(¶ms, input_shape, input_data, weight_feature_shape,
- weight_feature_data, weight_time_shape, weight_time_data,
- bias_shape, bias_data, scratchpad_data,
- activation_state_data, output_shape, output_data);
-}
-
-static inline void SetupScratchpadTensor(
- const luci_interpreter::DataType &input_data_type,
- const luci_interpreter::DataType &weight_feature_data_type,
- luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
- luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
- luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
- const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
- const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
-{
-
- if (input_data_type == loco::DataType::FLOAT32 &&
- (weight_feature_data_type == loco::DataType::S8 ||
- weight_feature_data_type == loco::DataType::U8))
- {
- (void)input_shape;
- (void)weight_time_shape;
- (void)scratchpad_3;
- (void)scratchpad_4;
- (void)scratchpad_5;
- (void)scratchpad_6;
-
- throw std::runtime_error("Hybrid type is not currently supported for linux platform");
- }
-
- // Resize scratchpad_1 tensor
- scratchpad_1->resize({batch_size, num_filters});
-
- if (input_data_type == loco::DataType::S8)
- {
- // Resize scratchpad_2 for full_integer op
- scratchpad_2->resize({batch_size, num_units});
- }
-}
-
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_SVDF_H
+++ /dev/null
-macro(initialize_pal)
- nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
- nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
- nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
- nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
-
- if (NOT TensorFlowSource_FOUND)
- message(STATUS "Skipping luci-interpreter: TensorFlow not found")
- return()
- endif ()
-
- if (NOT TensorFlowGEMMLowpSource_FOUND)
- message(STATUS "Skipping luci-interpreter: gemmlowp not found")
- return()
- endif ()
-
- if (NOT TensorFlowEigenSource_FOUND)
- message(STATUS "Skipping luci-interpreter: Eigen not found")
- return()
- endif ()
-
- if (NOT TensorFlowRuySource_FOUND)
- message(STATUS "Skipping luci-interpreter: Ruy not found")
- return()
- endif ()
-
- find_package(Threads REQUIRED)
-
- set(PAL_INITIALIZED TRUE)
-endmacro()
-
-macro(add_pal_to_target TGT)
- target_include_directories(${TGT} PRIVATE "${PAL}")
- target_include_directories(${TGT} SYSTEM PRIVATE
- "${TensorFlowRuySource_DIR}"
- "${TensorFlowGEMMLowpSource_DIR}"
- "${TensorFlowEigenSource_DIR}"
- "${TensorFlowSource_DIR}")
- target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
-
- # TODO put it back, I changed my mind.
- # instead add sources with visitors in this library
- set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
- ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
- ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
-
- if(BUILD_ARM32_NEON)
- # NOTE may need to revise this list for version upgrade
- set(PAL_SOURCES ${PAL_SOURCES}
- ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
- ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/cpu_check.cc
- ${TensorFlowRuySource_DIR}/ruy/allocator.cc
- ${TensorFlowRuySource_DIR}/ruy/block_map.cc
- ${TensorFlowRuySource_DIR}/ruy/blocking_counter.cc
- ${TensorFlowRuySource_DIR}/ruy/context_get_ctx.cc
- ${TensorFlowRuySource_DIR}/ruy/cpuinfo.cc
- ${TensorFlowRuySource_DIR}/ruy/ctx.cc
- ${TensorFlowRuySource_DIR}/ruy/denormal.cc
- ${TensorFlowRuySource_DIR}/ruy/frontend.cc
- ${TensorFlowRuySource_DIR}/ruy/pack_arm.cc
- ${TensorFlowRuySource_DIR}/ruy/prepacked_cache.cc
- ${TensorFlowRuySource_DIR}/ruy/prepare_packed_matrices.cc
- ${TensorFlowRuySource_DIR}/ruy/system_aligned_alloc.cc
- ${TensorFlowRuySource_DIR}/ruy/thread_pool.cc
- ${TensorFlowRuySource_DIR}/ruy/trmul.cc
- ${TensorFlowRuySource_DIR}/ruy/tune.cc
- ${TensorFlowRuySource_DIR}/ruy/wait.cc
- ${TensorFlowRuySource_DIR}/ruy/kernel_arm32.cc
- )
- endif(BUILD_ARM32_NEON)
-
- add_library(luci_interpreter_linux_pal STATIC ${PAL_SOURCES})
- set_target_properties(luci_interpreter_linux_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
- target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE
- "${TensorFlowRuySource_DIR}"
- "${TensorFlowGEMMLowpSource_DIR}"
- "${TensorFlowEigenSource_DIR}"
- "${TensorFlowSource_DIR}"
- )
-
- target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_linux_pal)
-endmacro()
+++ /dev/null
-REGISTER_KERNEL(Add)
-REGISTER_KERNEL(ArgMax)
-REGISTER_KERNEL(AveragePool2D)
-REGISTER_KERNEL(BatchToSpaceND)
-REGISTER_KERNEL(Cast)
-REGISTER_KERNEL(Concatenation)
-REGISTER_KERNEL(Conv2D)
-REGISTER_KERNEL(DepthToSpace)
-REGISTER_KERNEL(DepthwiseConv2D)
-REGISTER_KERNEL(Dequantize)
-REGISTER_KERNEL(Div)
-REGISTER_KERNEL(Elu)
-REGISTER_KERNEL(Exp)
-REGISTER_KERNEL(ExpandDims)
-REGISTER_KERNEL(Fill)
-REGISTER_KERNEL(Floor)
-REGISTER_KERNEL(FloorDiv)
-REGISTER_KERNEL(Equal)
-REGISTER_KERNEL(FullyConnected)
-REGISTER_KERNEL(Greater)
-REGISTER_KERNEL(GreaterEqual)
-REGISTER_KERNEL(If)
-REGISTER_KERNEL(InstanceNorm)
-REGISTER_KERNEL(L2Normalize)
-REGISTER_KERNEL(L2Pool2D)
-REGISTER_KERNEL(LeakyRelu)
-REGISTER_KERNEL(Less)
-REGISTER_KERNEL(LessEqual)
-REGISTER_KERNEL(LogicalAnd)
-REGISTER_KERNEL(LogicalNot)
-REGISTER_KERNEL(LogicalOr)
-REGISTER_KERNEL(Logistic)
-REGISTER_KERNEL(Maximum)
-REGISTER_KERNEL(MaxPool2D)
-REGISTER_KERNEL(Minimum)
-REGISTER_KERNEL(MirrorPad)
-REGISTER_KERNEL(Mul)
-REGISTER_KERNEL(Neg)
-REGISTER_KERNEL(NotEqual)
-REGISTER_KERNEL(Pad)
-REGISTER_KERNEL(PadV2)
-REGISTER_KERNEL(PRelu)
-REGISTER_KERNEL(Quantize)
-REGISTER_KERNEL(Reshape)
-REGISTER_KERNEL(ResizeBilinear)
-REGISTER_KERNEL(ResizeNearestNeighbor)
-REGISTER_KERNEL(Rsqrt)
-REGISTER_KERNEL(Shape)
-REGISTER_KERNEL(Softmax)
-REGISTER_KERNEL(SpaceToBatchND)
-REGISTER_KERNEL(SpaceToDepth)
-REGISTER_KERNEL(StridedSlice)
-REGISTER_KERNEL(Sqrt)
-REGISTER_KERNEL(Square)
-REGISTER_KERNEL(SquaredDifference)
-REGISTER_KERNEL(Squeeze)
-REGISTER_KERNEL(Sub)
-REGISTER_KERNEL(SVDF)
-REGISTER_KERNEL(Tanh)
-REGISTER_KERNEL(Transpose)
-REGISTER_KERNEL(TransposeConv)
-REGISTER_KERNEL(While)
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
-#define LUCI_INTERPRETER_PAL_ARGMAX_H
-
-#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
-
-namespace luci_interpreter_pal
-{
-template <typename T>
-static inline void
-BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
- const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
- const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
- const tflite::RuntimeShape &unextended_output_shape, T *output_data)
-{
- tflite::reference_ops::BatchToSpaceND(
- unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
- unextended_input3_shape, crops_data, unextended_output_shape, output_data);
-}
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
-#define LUCI_INTERPRETER_PAL_CONV2D_H
-
-#include <tensorflow/lite/kernels/internal/reference/conv.h>
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
-
-namespace luci_interpreter_pal
-{
-static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape,
- const float *input_data, const tflite::RuntimeShape &filter_shape,
- const float *filter_data, const tflite::RuntimeShape &bias_shape,
- const float *bias_data, const tflite::RuntimeShape &output_shape,
- float *output_data, const tflite::RuntimeShape &scratchpad_shape,
- float *scratchpad_data)
-{
- (void)scratchpad_shape;
- (void)scratchpad_data;
- tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
- bias_shape, bias_data, output_shape, output_data,
- tflite::RuntimeShape(), nullptr);
-}
-
-static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape,
- const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
- const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
- const int32 *bias_data, const tflite::RuntimeShape &output_shape,
- uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
- uint8 *scratchpad_data)
-{
- (void)scratchpad_shape;
- (void)scratchpad_data;
- tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
- bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
- scratchpad_data, nullptr);
-}
-
-static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult,
- const int32_t *shifts, const tflite::RuntimeShape &input_shape,
- const int8 *input_data, const tflite::RuntimeShape &filter_shape,
- const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
- const int32 *bias_data, const tflite::RuntimeShape &output_shape,
- int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
- int8 *scratchpad_data)
-{
- (void)scratchpad_shape;
- (void)scratchpad_data;
- tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
- filter_shape, filter_data, bias_shape, bias_data,
- output_shape, output_data);
-}
-
-static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
- const luci_interpreter::DataType &input_data_type,
- const tflite::ConvParams ¶ms,
- const tflite::RuntimeShape &input_shape,
- const tflite::RuntimeShape &filter_shape,
- const tflite::RuntimeShape &output_shape)
-{
- (void)input_data_type;
- (void)params;
- (void)input_shape;
- (void)filter_shape;
- (void)output_shape;
- scratchpad->set_allocatable(false);
-}
-
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_CONV2D_H
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
-#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
-
-#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
-
-namespace luci_interpreter_pal
-{
-
-template <typename T>
-static inline void Dequantize(tflite::DequantizationParams ¶ms,
- const tflite::RuntimeShape &input_shape, const T *input_data,
- const tflite::RuntimeShape &output_shape, float *output_data)
-{
- tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
- output_data);
-}
-
-static inline void Dequantize(tflite::DequantizationParams ¶ms,
- const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
- const tflite::RuntimeShape &output_shape, float *output_data)
-{
- tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
-}
-
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
-#define LUCI_INTERPRETER_PAL_QUANTIZE_H
-
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
-
-namespace luci_interpreter_pal
-{
-template <typename T>
-static inline void Quantize(tflite::QuantizationParams ¶ms,
- const tflite::RuntimeShape &input_shape, const float *input_data,
- const tflite::RuntimeShape &output_shape, T *output_data)
-{
- tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
-}
-
-template <typename Input, typename Output>
-static inline void Requantize(const Input *input_data, int32_t size,
- int32_t effective_scale_multiplier, int32_t effective_scale_shift,
- int32_t input_zero_point, int32_t output_zero_point,
- Output *output_data)
-{
- tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
- effective_scale_shift, input_zero_point, output_zero_point,
- output_data);
-}
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_SVDF_H
-#define LUCI_INTERPRETER_PAL_SVDF_H
-
-#include <tensorflow/lite/kernels/internal/reference/svdf.h>
-
-namespace luci_interpreter_pal
-{
-static inline void
-IntegerSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape,
- const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
- const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
- const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
- const int32_t *bias_data, int16_t *activation_state_data,
- const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
- int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
- int scale_2_b, int32_t input_zp, int32_t output_zp)
-{
- const int n_rank = params.rank;
- const int n_batch = input_shape.Dims(0);
- const int n_input = input_shape.Dims(1);
- const int n_filter = weight_feature_shape.Dims(0);
- const int n_unit = n_filter / n_rank;
- const int n_memory = weight_time_shape.Dims(1);
-
- // Left shift the activation_state.
- {
- int16_t *new_state_start = activation_state_data;
- const int16_t *old_state_start = activation_state_data + 1;
- const int16_t *old_state_end = activation_state_data + n_batch * n_filter * n_memory;
- while (old_state_start != old_state_end)
- {
- *new_state_start++ = *old_state_start++;
- }
- }
-
- // Note: no need to clear the latest activation, matmul is not accumulative.
-
- // Feature matmul.
- {
- const int32_t output_max = std::numeric_limits<int16_t>::max();
- const int32_t output_min = std::numeric_limits<int16_t>::min();
- int16_t *result_in_batch = activation_state_data + (n_memory - 1);
- for (int b = 0; b < n_batch; b++)
- {
- const int8_t *matrix_ptr = weight_feature_data;
- for (int r = 0; r < n_filter; r++)
- {
- int32_t dot_prod = 0;
- const int8_t *vector_in_batch = input_data + b * n_input;
- for (int c = 0; c < n_input; c++)
- {
- dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
- }
- dot_prod = tflite::MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b);
- dot_prod = std::min(std::max(output_min, dot_prod), output_max);
- // This assumes state is symmetrically quantized. Otherwise last bit of
- // state should be initialized to its zero point and accumulate the
- // dot_prod.
- // Equivalent as the following:
- // result_in_batch = zero point, which happens to be zero.
- // result_in_batch += dot_prod_56.
- *result_in_batch = dot_prod;
- result_in_batch += n_memory;
- }
- }
- }
-
- // Time.
- {
- for (int b = 0; b < n_batch; ++b)
- {
- int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
-
- // Perform batched vector dot product:
- const int16_t *vector1_ptr = weight_time_data;
- const int16_t *vector2_ptr = activation_state_data + b * n_memory * n_filter;
-
- for (int i = 0; i < n_filter; i++)
- {
- *scratch_ptr_batch = 0;
- for (int j = 0; j < n_memory; j++)
- {
- *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
- }
- scratch_ptr_batch++;
- }
- }
- }
-
- // Reduce, add bias, rescale, activation.
- {
- // Add bias.
- if (bias_data)
- {
- // Vector batch assign:
- for (int i = 0; i < n_batch; ++i)
- {
- int32_t *output_ptr = output_temp_data + i * n_unit;
- const int32_t *bias_ptr = bias_data;
- for (int j = 0; j < n_unit; ++j)
- {
- *output_ptr++ = *bias_ptr++;
- }
- }
- }
- else
- {
- int32_t *output_ptr = output_temp_data;
- for (int i = 0; i < n_batch * n_unit; ++i)
- {
- *output_ptr++ = 0;
- }
- }
-
- // Reduce.
- for (int b = 0; b < n_batch; ++b)
- {
- int32_t *output_temp_ptr = output_temp_data + b * n_unit;
- int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
-
- // Reduction sum vector
- for (int i = 0; i < n_unit; ++i)
- {
- for (int j = 0; j < n_rank; ++j)
- {
- output_temp_ptr[i] += *scratch_ptr_batch++;
- }
- }
- }
-
- // Rescale.
- const int32_t output_max = std::numeric_limits<int8_t>::max();
- const int32_t output_min = std::numeric_limits<int8_t>::min();
- for (int i = 0; i < n_batch * n_unit; ++i)
- {
- int32_t x1 = output_temp_data[i];
- int32_t x2 = tflite::MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b);
- int32_t x3 = x2 + output_zp;
- int32_t x4 = std::min(std::max(output_min, x3), output_max);
- output_data[i] = static_cast<int8_t>(x4);
- }
- }
-}
-static inline void
-FloatSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape,
- const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
- const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
- const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
- const float *bias_data, float *scratchpad_data, float *activation_state_data,
- const tflite::RuntimeShape &output_shape, float *output_data)
-{
- const int32_t rank = params.rank;
- const int32_t batch_size = input_shape.Dims(0);
- const int32_t input_size = input_shape.Dims(1);
- const int32_t num_filters = weight_feature_shape.Dims(0);
- const int32_t num_units = num_filters / rank;
- const int32_t memory_size = weight_time_shape.Dims(1);
-
- // Left shift the activation_state.
- {
- float *new_state_start = activation_state_data;
- const float *old_state_start = activation_state_data + 1;
- const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
- while (old_state_start != old_state_end)
- {
- *new_state_start++ = *old_state_start++;
- }
- }
-
- // Note: no need to clear the latest activation, matmul is not accumulative.
-
- // Compute conv1d(inputs, weights_feature).
- // The activation_state's rightmost column is used to save current cycle
- // activation. This is achieved by starting at state_ptr[memory_size - 1] and
- // having the stride equal to memory_size.
-
- // Perform batched matrix vector multiply operation:
- {
- const float *matrix = weight_feature_data;
- const float *vector = input_data;
- float *result = &activation_state_data[memory_size - 1];
- float *result_in_batch = result;
- for (int i = 0; i < batch_size; ++i)
- {
- const float *matrix_ptr = matrix;
- for (int j = 0; j < num_filters; ++j)
- {
- float dot_prod = 0.0f;
- const float *vector_in_batch = vector + i * input_size;
- for (int k = 0; k < input_size; ++k)
- {
- dot_prod += *matrix_ptr++ * *vector_in_batch++;
- }
- *result_in_batch = dot_prod;
- result_in_batch += memory_size;
- }
- }
- }
-
- tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
- batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
- params.activation, activation_state_data, scratchpad_data, output_data);
-}
-
-static inline void SetupScratchpadTensor(
- const luci_interpreter::DataType &input_data_type,
- const luci_interpreter::DataType &weight_feature_data_type,
- luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
- luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
- luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
- const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
- const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
-{
-
- if (input_data_type == loco::DataType::FLOAT32 &&
- (weight_feature_data_type == loco::DataType::S8 ||
- weight_feature_data_type == loco::DataType::U8))
- {
- (void)input_shape;
- (void)weight_time_shape;
- (void)scratchpad_3;
- (void)scratchpad_4;
- (void)scratchpad_5;
- (void)scratchpad_6;
-
- throw std::runtime_error("Hybrid type is not currently supported for mcu platform");
- }
-
- // Resize scratchpad_1 tensor
- scratchpad_1->resize({batch_size, num_filters});
-
- if (input_data_type == loco::DataType::S8)
- {
- // Resize scratchpad_2 for full_integer op
- scratchpad_2->resize({batch_size, num_units});
- }
-}
-
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_SVDF_H
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
-#define LUCI_INTERPRETER_PAL_SOFTMAX_H
-
-#include <tensorflow/lite/kernels/internal/reference/softmax.h>
-
-namespace luci_interpreter_pal
-{
-static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
- float beta)
-{
- // Do nothing for mcu
- (void)data;
- (void)input_scale;
- (void)beta;
-}
-
-static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
-{
- int32 input_beta_multiplier;
- int input_beta_left_shift;
- static const int kScaledDiffIntegerBits = 5;
- tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits,
- &input_beta_multiplier, &input_beta_left_shift);
-
- params->input_multiplier = input_beta_multiplier;
- params->input_left_shift = input_beta_left_shift;
- params->diff_min =
- -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift);
-}
-
-template <typename T>
-static inline void Softmax(const tflite::SoftmaxParams ¶ms,
- const tflite::RuntimeShape &input_shape, const T *input_data,
- const tflite::RuntimeShape &output_shape, T *output_data)
-{
- // MARK: At this moment this operation doesn't support on mcu
- assert(false && "Softmax NYI");
- (void)params;
- (void)input_shape;
- (void)input_data;
- (void)output_shape;
- (void)output_data;
-}
-} // namespace luci_interpreter_pal
-
-#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
+++ /dev/null
-macro(initialize_pal)
- nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
- nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
- nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
- nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
-
- if (NOT TensorFlowSource_FOUND)
- message(STATUS "Skipping luci-interpreter: TensorFlow not found")
- return()
- endif ()
-
- if (NOT TensorFlowGEMMLowpSource_FOUND)
- message(STATUS "Skipping luci-interpreter: gemmlowp not found")
- return()
- endif ()
-
- if (NOT TensorFlowEigenSource_FOUND)
- message(STATUS "Skipping luci-interpreter: Eigen not found")
- return()
- endif ()
-
- if (NOT TensorFlowRuySource_FOUND)
- message(STATUS "Skipping luci-interpreter: Ruy not found")
- return()
- endif ()
- #find_package(Threads REQUIRED)
-
- set(PAL_INITIALIZED TRUE)
-endmacro()
-
-macro(add_pal_to_target TGT)
- target_include_directories(${TGT} PRIVATE "${PAL}")
- target_include_directories(${TGT} PRIVATE
- "${TensorFlowRuySource_DIR}"
- "${TensorFlowGEMMLowpSource_DIR}"
- "${TensorFlowEigenSource_DIR}"
- "${TensorFlowSource_DIR}")
- target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
-
- # TODO put it back, I changed my mind.
- # instead add sources with visitors in this library
- set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
- ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
- ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
- add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES})
- set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
- target_include_directories(luci_interpreter_mcu_pal PRIVATE
- "${TensorFlowRuySource_DIR}"
- "${TensorFlowGEMMLowpSource_DIR}"
- "${TensorFlowEigenSource_DIR}"
- "${TensorFlowSource_DIR}"
- )
-
- target_link_libraries(${TGT} PRIVATE luci_interpreter_mcu_pal)
- #target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_mcu_pal)
-endmacro()
+++ /dev/null
-require(luci)
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci_interpreter/BuddyMemoryManager.h"
-
-namespace luci_interpreter
-{
-
-BuddyMemoryManager::BuddyMemoryManager(uint8_t *memory_start, int32_t memSize)
-{
- int32_t p = lowerLog2(memSize);
-
- // We assume that the requested size of memory does not exceed 4 GB
- assert(p < 32);
- memSize = 1 << p;
-
- _start_block = reinterpret_cast<Block *>(memory_start);
- _start_block->size = memSize - sizeof(Block);
- _start_block->is_free = true;
- _start_block->self = _start_block;
- _num_blocks = 0;
- _size = _start_block->size;
-
- for (auto &_free_block : _free_blocks)
- _free_block = nullptr;
-
- addToBlocks(_start_block, p);
-}
-
-void BuddyMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
-{
- const size_t element_size = getDataTypeSize(tensor.element_type());
- const int32_t num_elements = tensor.shape().num_elements();
- auto size = num_elements * element_size;
- auto footprint = size + sizeof(Block);
- auto l = (footprint & (footprint - 1)) == 0
- ? lowerLog2(footprint)
- : lowerLog2(footprint) + 1; // check footprint is pow_of_2
-
- while (l < 32 && !_free_blocks[l])
- l++;
-
- assert(l < 32);
-
- Block *tmp;
- tmp = _free_blocks[l];
- removeFromBlocks(tmp, l);
-
- while ((tmp->size + sizeof(Block)) / 2 >= size + sizeof(Block))
- {
- divideBlock(tmp, l);
- l--;
- }
-
- tmp->is_free = false;
- tmp->self = tmp;
- _num_blocks++;
-
- auto *data = (uint8_t *)(tmp + 1);
- tensor.set_data_buffer(data);
-}
-
-void BuddyMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
-{
- auto data = tensor.data<void>();
- auto *tmp = (Block *)((uint8_t *)data - sizeof(Block));
-
- assert(tmp->self == tmp);
-
- tmp->is_free = true;
- addToBlocks(tmp, lowerLog2(tmp->size + sizeof(Block)));
-
- while (tmp)
- if (tmp->size == _size)
- break;
- else
- tmp = mergeBlock(tmp);
-
- _num_blocks--;
- tensor.set_data_buffer(nullptr);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci_interpreter/BuddyMemoryManager.h"
-#include <gtest/gtest.h>
-
-namespace luci_interpreter
-{
-namespace
-{
-
-using namespace testing;
-
-TEST(BuddyMemoryManager, basic)
-{
- auto mem_pool = std::make_unique<uint8_t[]>(200);
- auto buddy_memory_manager = std::make_unique<BuddyMemoryManager>(mem_pool.get(), 130);
- Tensor first_tensor(DataType::U8, Shape({8}), AffineQuantization{}, "first_tensor");
-
- buddy_memory_manager->allocate_memory(first_tensor);
-
- uint8_t data_1[] = {1, 2, 3, 4, 5, 6, 7, 8};
-
- first_tensor.writeData(data_1, 8);
- uint8_t array_1[8];
- first_tensor.readData(array_1, 8);
- for (int i = 0; i < 8; i++)
- {
- EXPECT_EQ(data_1[i], array_1[i]);
- }
-
- Tensor second_tensor(DataType::U8, Shape({2, 5}), AffineQuantization{}, "second_tensor");
- buddy_memory_manager->allocate_memory(second_tensor);
-
- uint8_t data_2[2][5] = {{11, 22, 33, 44, 55}, {12, 23, 34, 45, 56}};
- second_tensor.writeData(data_2, 10);
-
- uint8_t array_2[2][5];
- second_tensor.readData(array_2, 10);
- for (int i = 0; i < 2; i++)
- {
- for (int j = 0; j < 5; j++)
- {
- EXPECT_EQ(data_2[i][j], array_2[i][j]);
- }
- }
-
- buddy_memory_manager->release_memory(first_tensor);
- EXPECT_EQ(first_tensor.data<void>(), nullptr);
-
- buddy_memory_manager->release_memory(second_tensor);
- EXPECT_EQ(second_tensor.data<void>(), nullptr);
-}
-
-} // namespace
-} // namespace luci_interpreter
+++ /dev/null
-include("${LUCI_INTERPRETER_PAL_DIR}/pal.cmake")
-
-initialize_pal()
-
-if (NOT PAL_INITIALIZED)
- message("PAL Failed to initialize, skip luci-interpreter")
- return()
-endif()
-
-message(STATUS "LUCI INTERPRETER BEGIN")
-
-set(LUCI_INTERPRETER_BINARY "luci_interpreter${LUCI_INTERPRETER_SUFFIX}")
-set(LUCI_INTERPRETER_CORE "luci_interpreter_core${LUCI_INTERPRETER_SUFFIX}")
-set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels${LUCI_INTERPRETER_SUFFIX}")
-set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader${LUCI_INTERPRETER_SUFFIX}")
-set(LUCI_INTERPRETER_IMPORT "luci_interpreter_import${LUCI_INTERPRETER_SUFFIX}")
-
-add_subdirectory(core)
-message(STATUS "LUCI INTERPRETER CORE")
-add_subdirectory(kernels)
-message(STATUS "LUCI INTERPRETER KERNELS")
-add_subdirectory(loader)
-message(STATUS "LUCI INTERPRETER LOADER")
-add_subdirectory(import)
-message(STATUS "LUCI INTERPRETER IMPORT")
-
-message(STATUS "LUCI INTERPTER INITALIZED")
-
-set(SOURCES
- "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/Interpreter.h"
- Interpreter.cpp "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h" SimpleMemoryManager.cpp
- "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h" TestMemoryManager.cpp
- "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/BuddyMemoryManager.h" BuddyMemoryManager.cpp
- "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/StaticMemoryManager.h" StaticMemoryManager.cpp)
-
-if (NOT LUCI_INTERPRETER_STATIC)
- add_library(${LUCI_INTERPRETER_BINARY} SHARED ${SOURCES})
-else ()
- add_library(${LUCI_INTERPRETER_BINARY} STATIC ${SOURCES})
-endif ()
-
-set(TEST_SOURCES BuddyMemoryManager.test.cpp)
-
-target_include_directories(${LUCI_INTERPRETER_BINARY} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
-target_include_directories(${LUCI_INTERPRETER_BINARY} PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}")
-target_link_libraries(${LUCI_INTERPRETER_BINARY}
- PUBLIC luci_lang ${LUCI_INTERPRETER_LOADER} ${LUCI_INTERPRETER_CORE}
- PRIVATE nncc_common)
-
-install(TARGETS ${LUCI_INTERPRETER_BINARY} DESTINATION lib)
-install(DIRECTORY include/ DESTINATION include
- FILES_MATCHING PATTERN "*.h")
-
-if(NOT ENABLE_TEST)
- return()
-endif(NOT ENABLE_TEST)
-
-nnas_find_package(GTest REQUIRED)
-
-GTest_AddTest(buddy_manager_test ${TEST_SOURCES})
-target_link_libraries(buddy_manager_test ${LUCI_INTERPRETER_BINARY})
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci_interpreter/Interpreter.h"
-#include "luci_interpreter/SimpleMemoryManager.h"
-
-#include "loader/ModuleLoader.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace
-{
-
-class EventNotifierImpl final : public EventNotifier
-{
-public:
- EventNotifierImpl(const RuntimeToIR &runtime_to_ir,
- const std::vector<ExecutionObserver *> &observers)
- : _runtime_to_ir(runtime_to_ir), _observers(observers)
- {
- }
-
- void postTensorWrite(const Tensor *tensor) override
- {
- assert(tensor != nullptr);
- for (const auto &observer : _observers)
- {
- observer->postTensorWrite(_runtime_to_ir.tensor_to_node.at(tensor), tensor);
- }
- }
-
- void preOperatorExecute(const Kernel *kernel) override
- {
- assert(kernel != nullptr);
- for (const auto &observer : _observers)
- {
- observer->preOperatorExecute(_runtime_to_ir.kernel_to_node.at(kernel));
- }
- }
-
- void postOperatorExecute(const Kernel *kernel) override
- {
- assert(kernel != nullptr);
- for (const auto &observer : _observers)
- {
- observer->postOperatorExecute(_runtime_to_ir.kernel_to_node.at(kernel));
- }
- }
-
-private:
- const RuntimeToIR &_runtime_to_ir;
- const std::vector<ExecutionObserver *> &_observers;
-};
-
-} // namespace
-
-Interpreter::Interpreter(const luci::Module *module)
-{
- _runtime_to_ir = std::make_unique<RuntimeToIR>();
- _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
- _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
-
- _default_memory_manager = std::make_unique<SimpleMemoryManager>();
-
- ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
- _default_memory_manager.get());
- loader.load();
-}
-
-Interpreter::Interpreter(const luci::Module *module,
- luci_interpreter::IMemoryManager *memory_manager)
-{
- assert(memory_manager && "Use Interpreter::Interpreter(module) constructor instead");
-
- _runtime_to_ir = std::make_unique<RuntimeToIR>();
- _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
- _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
-
- ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
- memory_manager);
- loader.load();
-}
-
-Interpreter::~Interpreter() = default;
-
-void Interpreter::writeInputTensor(const luci::CircleInput *input_node, const void *data,
- size_t data_size)
-{
- Tensor *tensor = _runtime_module->getInputTensors()[input_node->index()];
- if (tensor == nullptr)
- {
- const std::string &name = input_node->name();
- throw std::runtime_error("Cannot find tensor for input node named \"" + name + "\".");
- }
- if (data != nullptr)
- tensor->writeData(data, data_size);
-}
-
-void Interpreter::readOutputTensor(const luci::CircleOutput *output_node, void *data,
- size_t data_size)
-{
- Tensor *tensor = _runtime_module->getOutputTensors()[output_node->index()];
- if (tensor == nullptr)
- {
- const std::string &name = output_node->name();
- throw std::runtime_error("Cannot find tensor for output node named \"" + name + "\".");
- }
- if (data != nullptr)
- tensor->readData(data, data_size);
-}
-
-void Interpreter::interpret() { _runtime_module->execute(); }
-
-void Interpreter::attachObserver(ExecutionObserver *observer)
-{
- if (std::find(_observers.cbegin(), _observers.cend(), observer) != _observers.cend())
- throw std::runtime_error("Observer is already attached.");
- _observers.push_back(observer);
-}
-
-ExecutionObserver::~ExecutionObserver() = default;
-
-void ExecutionObserver::postTensorWrite(const luci::CircleNode *, const Tensor *) {}
-
-void ExecutionObserver::preOperatorExecute(const luci::CircleNode *) {}
-
-void ExecutionObserver::postOperatorExecute(const luci::CircleNode *) {}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci_interpreter/SimpleMemoryManager.h"
-
-namespace luci_interpreter
-{
-
-void SimpleMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
-{
- if (!tensor.is_allocatable())
- {
- return;
- }
- if (tensor.is_data_allocated())
- {
- release_memory(tensor);
- }
- const auto element_size = getDataTypeSize(tensor.element_type());
- const auto num_elements = tensor.shape().num_elements();
-
- auto *data = new uint8_t[num_elements * element_size];
- tensor.set_data_buffer(data);
-}
-
-void SimpleMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
-{
- if (!tensor.is_data_allocated())
- {
- tensor.set_data_buffer(nullptr);
- return;
- }
- auto data = tensor.data<uint8_t>();
- delete[] data;
- tensor.set_data_buffer(nullptr);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci_interpreter/StaticMemoryManager.h"
-
-namespace luci_interpreter
-{
-
-void StaticMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
-{
- if (!tensor.is_allocatable())
- {
- return;
- }
- int32_t offset = tensor.get_offset();
- assert(offset >= 0);
- auto tensor_ptr = _buffer_ptr + offset;
- tensor.set_data_buffer(tensor_ptr);
-}
-
-void StaticMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
-{
- tensor.set_data_buffer(nullptr);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-
-void TestMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
-{
- if (!tensor.is_allocatable())
- {
- return;
- }
- if (tensor.is_data_allocated())
- {
- release_memory(tensor);
- }
- const auto element_size = getDataTypeSize(tensor.element_type());
- const auto num_elements = tensor.shape().num_elements();
-
- auto *data = new uint8_t[num_elements * element_size];
- allocations.push_back(data);
- tensor.set_data_buffer(data);
-}
-
-void TestMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
-{
- tensor.set_data_buffer(nullptr);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-set(SOURCES
- "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/DataType.h"
- "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/Tensor.h"
- EventNotifier.h
- Kernel.h
- KernelParams.h
- RuntimeGraph.h
- RuntimeGraph.cpp
- RuntimeModule.h
- Tensor.cpp)
-
-add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES})
-if (NOT NNCC_LIBRARY_NO_PIC)
- set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
-endif(NOT NNCC_LIBRARY_NO_PIC)
-target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
-target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
-target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_lang)
-target_link_libraries(${LUCI_INTERPRETER_CORE} PRIVATE nncc_common)
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
-#define LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
-
-namespace luci_interpreter
-{
-
-// Used at execution stage to tell the interpreter that the runtime state has changed in some way.
-class EventNotifier
-{
-public:
- virtual ~EventNotifier() = default;
-
- virtual void postTensorWrite(const Tensor *tensor) = 0;
- virtual void preOperatorExecute(const Kernel *kernel) = 0;
- virtual void postOperatorExecute(const Kernel *kernel) = 0;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_CORE_KERNEL_H
-#define LUCI_INTERPRETER_CORE_KERNEL_H
-
-#include "luci_interpreter/core/Tensor.h"
-
-#include <vector>
-
-namespace luci_interpreter
-{
-
-// Base class for all kernels.
-class Kernel
-{
-protected:
- Kernel(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs)
- : _inputs(std::move(inputs)), _outputs(std::move(outputs))
- {
- }
-
-public:
- virtual ~Kernel() = default;
-
- const std::vector<const Tensor *> &getInputTensors() const { return _inputs; }
- const std::vector<Tensor *> &getOutputTensors() const { return _outputs; }
-
- // Configures the kernel.
- // This function is currently called once for each kernel during interpreter construction,
- // which makes it a convenient place for preparing (resizing) output tensors.
- virtual void configure() = 0;
-
- // Executes the kernel.
- virtual void execute() const = 0;
-
-protected:
- // NOTE Prefer not to use these in derived classes.
- const std::vector<const Tensor *> _inputs;
- const std::vector<Tensor *> _outputs;
-};
-
-// Base class for kernels with parameters.
-template <typename Params> class KernelWithParams : public Kernel
-{
-protected:
- KernelWithParams(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs,
- const Params ¶ms)
- : Kernel(std::move(inputs), std::move(outputs)), _params(params)
- {
- }
-
-public:
- const Params ¶ms() const { return _params; }
-
-protected:
- const Params _params;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_CORE_KERNEL_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_CORE_KERNELPARAMS_H
-#define LUCI_INTERPRETER_CORE_KERNELPARAMS_H
-
-#include <luci/IR/AttrPadding.h>
-#include <luci/IR/AttrFusedActFunc.h>
-#include <luci/IR/AttrMirrorPadMode.h>
-#include <luci_interpreter/core/DataType.h>
-
-#include <cstdint>
-#include <vector>
-
-namespace luci_interpreter
-{
-
-// Inject commonly used types into `luci_interpreter` namespace for convenience.
-using Activation = luci::FusedActFunc;
-using Padding = luci::Padding;
-using MirrorPadMode = luci::MirrorPadMode;
-
-struct AddParams
-{
- Activation activation;
-};
-
-struct ArgMaxParams
-{
- DataType output_type;
-};
-
-struct BatchMatMulParams
-{
- bool adj_x;
- bool adj_y;
-};
-
-struct ConcatenationParams
-{
- int axis;
- Activation activation;
-};
-
-struct Conv2DParams
-{
- Padding padding;
- int32_t stride_height;
- int32_t stride_width;
- int32_t dilation_height_factor;
- int32_t dilation_width_factor;
- Activation activation;
-};
-
-struct DepthToSpaceParams
-{
- int block_size;
-};
-
-struct DepthwiseConv2DParams
-{
- Padding padding;
- int32_t depth_multiplier; // TODO Remove, as it can be calculated.
- int32_t stride_height;
- int32_t stride_width;
- int32_t dilation_height_factor;
- int32_t dilation_width_factor;
- Activation activation;
-};
-
-struct DivParams
-{
- Activation activation;
-};
-
-struct FullyConnectedParams
-{
- Activation activation;
- bool keep_num_dims = false;
-};
-
-struct GatherParams
-{
- int32_t axis;
- int32_t batch_dims;
-};
-
-struct InstanceNormParams
-{
- float epsilon;
- Activation activation;
-};
-
-struct L2NormParams
-{
- Activation activation;
-};
-
-struct LeakyReluParams
-{
- float alpha;
-};
-
-struct LocalResponseNormalizationParams
-{
- int32_t radius;
- float bias;
- float alpha;
- float beta;
-};
-
-struct MirrorPadParams
-{
- MirrorPadMode mode;
-};
-
-struct MulParams
-{
- Activation activation;
-};
-
-struct OneHotParams
-{
- int32_t axis;
-};
-
-struct PackParams
-{
- int32_t values_count;
- int32_t axis;
-};
-
-struct Pool2DParams
-{
- Padding padding;
- int32_t filter_height;
- int32_t filter_width;
- int32_t stride_height;
- int32_t stride_width;
- Activation activation;
-};
-
-struct ReducerParams
-{
- bool keep_dims;
-};
-
-struct ResizeBilinearParams
-{
- bool align_corners;
- bool half_pixel_centers;
-};
-
-struct ResizeNearestNeighborParams
-{
- bool align_corners;
- bool half_pixel_centers;
-};
-
-struct ShapeParams
-{
- loco::DataType out_type;
-};
-
-struct SubParams
-{
- Activation activation;
-};
-
-struct SVDFParams
-{
- bool asymmetric_quantize_inputs;
- int32_t svdf_rank;
- Activation activation;
-};
-
-struct SpaceToDepthParams
-{
- int block_size;
-};
-
-struct SoftmaxParams
-{
- float beta;
-};
-
-struct StridedSliceParams
-{
- int32_t begin_mask;
- int32_t end_mask;
- int32_t ellipsis_mask;
- int32_t new_axis_mask;
- int32_t shrink_axis_mask;
-};
-
-struct SqueezeParams
-{
- std::vector<int32_t> squeeze_dims;
-};
-
-struct TransposeConvParams
-{
- Padding padding;
- int32_t stride_height;
- int32_t stride_width;
-};
-
-struct UnpackParams
-{
- int axis;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_CORE_KERNELPARAMS_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "core/RuntimeGraph.h"
-
-#include "core/RuntimeModule.h"
-
-#include <algorithm>
-#include <unordered_map>
-
-namespace luci_interpreter
-{
-
-class RuntimeGraph::TensorAllocPlan
-{
- std::vector<std::vector<Tensor *>> _alloc_plan;
- std::vector<std::vector<Tensor *>> _dealloc_plan;
- bool _valid = false;
- IMemoryManager *_memory_manager;
-
-public:
- explicit TensorAllocPlan(IMemoryManager *memory_manager);
- void invalidate() { _valid = false; }
- bool isValid() const { return _valid; }
- void build(const RuntimeGraph &graph);
- void allocate(size_t kernel_index) const;
- void deallocate(size_t kernel_index) const;
-};
-
-RuntimeGraph::TensorAllocPlan::TensorAllocPlan(IMemoryManager *memory_manager)
- : _memory_manager(memory_manager)
-{
-}
-
-void RuntimeGraph::TensorAllocPlan::build(const RuntimeGraph &graph)
-{
- invalidate();
- using Lifetime = std::pair<size_t, size_t>;
- std::unordered_map<Tensor *, Lifetime> lifetimes;
- const size_t num_kernels = graph._kernels.size();
- for (size_t index = 0; index < num_kernels; ++index)
- {
- const auto &kernel = graph._kernels[index];
- for (const Tensor *tensor : kernel->getInputTensors())
- {
- auto nc_tensor = const_cast<Tensor *>(tensor);
- if (lifetimes.count(nc_tensor) > 0)
- lifetimes.at(nc_tensor).second = index;
- }
- for (Tensor *tensor : kernel->getOutputTensors())
- {
- assert(lifetimes.count(tensor) == 0);
- lifetimes[tensor] = Lifetime(index, index);
- }
- }
- for (const Tensor *tensor : graph.getOutputTensors())
- {
- auto nc_tensor = const_cast<Tensor *>(tensor);
- if (lifetimes.count(nc_tensor) > 0)
- lifetimes.at(nc_tensor).second = num_kernels;
- }
- _alloc_plan.assign(num_kernels, std::vector<Tensor *>());
- _dealloc_plan.assign(num_kernels + 1, std::vector<Tensor *>());
- for (const auto &item : lifetimes)
- {
- _alloc_plan[item.second.first].push_back(item.first);
- _dealloc_plan[item.second.second].push_back(item.first);
- }
- _valid = true;
-}
-
-void RuntimeGraph::TensorAllocPlan::allocate(size_t kernel_index) const
-{
- assert(_valid && kernel_index < _alloc_plan.size());
- for (Tensor *tensor : _alloc_plan[kernel_index])
- {
- _memory_manager->allocate_memory(*tensor);
- }
-}
-
-void RuntimeGraph::TensorAllocPlan::deallocate(size_t kernel_index) const
-{
- assert(_valid && kernel_index < _dealloc_plan.size());
- for (Tensor *tensor : _dealloc_plan[kernel_index])
- {
- _memory_manager->release_memory(*tensor);
- }
-}
-
-RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager)
- : _owning_module(owning_module), _memory_manager(memory_manager),
- _tensor_alloc_plan(std::make_unique<TensorAllocPlan>(memory_manager))
-{
-}
-
-RuntimeGraph::~RuntimeGraph()
-{
- for (auto &tensor : _tensors)
- {
- if (tensor->is_data_allocated())
- _memory_manager->release_memory(*tensor);
- }
-}
-
-Tensor *RuntimeGraph::addTensor(std::unique_ptr<Tensor> &&tensor)
-{
- assert(tensor != nullptr);
- _tensors.push_back(std::move(tensor));
- return _tensors.back().get();
-}
-
-void RuntimeGraph::setInputTensors(const std::vector<Tensor *> &input_tensors)
-{
- assert(std::all_of(input_tensors.cbegin(), input_tensors.cend(),
- [](Tensor *tensor) { return tensor != nullptr; }));
- _input_tensors = input_tensors;
-}
-
-void RuntimeGraph::setOutputTensors(const std::vector<Tensor *> &output_tensors)
-{
- assert(std::all_of(output_tensors.cbegin(), output_tensors.cend(),
- [](Tensor *tensor) { return tensor != nullptr; }));
- _output_tensors = output_tensors;
-}
-
-void RuntimeGraph::configureAllocations(Tensor *tensor)
-{
- _memory_manager->allocate_memory(*tensor);
-}
-
-void RuntimeGraph::addKernel(std::unique_ptr<Kernel> &&kernel)
-{
- assert(kernel != nullptr);
- _kernels.push_back(std::move(kernel));
- _tensor_alloc_plan->invalidate();
-}
-
-void RuntimeGraph::execute() const
-{
- if (!_tensor_alloc_plan->isValid())
- _tensor_alloc_plan->build(*this);
-
- EventNotifier *event_notifier = _owning_module->getEventNotifier();
-
- // Notify the observers that the input tensors have changed.
- if (event_notifier != nullptr)
- {
- for (const Tensor *input_tensor : getInputTensors())
- {
- if (input_tensor->is_observable())
- event_notifier->postTensorWrite(input_tensor);
- }
- }
-
- for (size_t index = 0; index < _kernels.size(); ++index)
- {
- const auto &kernel = _kernels[index];
- if (event_notifier != nullptr)
- {
- event_notifier->preOperatorExecute(kernel.get());
- }
-
- // TODO The `configure` method should only be called if the outputs of an operator need to be
- // resized.
- kernel->configure();
-
- // Preallocate outputs in advance instead of relying on automatic allocation
- _tensor_alloc_plan->allocate(index);
-
- kernel->execute();
-
- if (event_notifier != nullptr)
- {
- event_notifier->postOperatorExecute(kernel.get());
- }
-
- for (const Tensor *tensor : kernel->getOutputTensors())
- {
- if (event_notifier != nullptr && tensor->is_observable())
- {
- event_notifier->postTensorWrite(tensor);
- }
- }
- _tensor_alloc_plan->deallocate(index);
- }
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
-#define LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
-
-#include "luci_interpreter/core/Tensor.h"
-#include "luci_interpreter/MemoryManager.h"
-#include "core/Kernel.h"
-
-#include <memory>
-#include <vector>
-
-namespace luci_interpreter
-{
-
-class RuntimeModule;
-
-class RuntimeGraph
-{
-private:
- class TensorAllocPlan;
- friend class TensorAllocPlan;
-
-public:
- explicit RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager);
- ~RuntimeGraph();
-
- Tensor *addTensor(std::unique_ptr<Tensor> &&tensor);
-
- void setInputTensors(const std::vector<Tensor *> &input_tensors);
- void setOutputTensors(const std::vector<Tensor *> &output_tensors);
-
- void configureAllocations(Tensor *tensor);
-
- const std::vector<Tensor *> &getInputTensors() const { return _input_tensors; }
- const std::vector<Tensor *> &getOutputTensors() const { return _output_tensors; }
-
- void addKernel(std::unique_ptr<Kernel> &&kernel);
-
- void execute() const;
-
-private:
- IMemoryManager *_memory_manager;
- RuntimeModule *_owning_module;
- std::vector<std::unique_ptr<Tensor>> _tensors;
- std::vector<Tensor *> _input_tensors;
- std::vector<Tensor *> _output_tensors;
-
- // Kernels in execution order.
- std::vector<std::unique_ptr<Kernel>> _kernels;
- // Tensors that are not used anymore after given op
- std::unique_ptr<TensorAllocPlan> _tensor_alloc_plan;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
-#define LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
-
-#include "core/RuntimeGraph.h"
-#include "core/EventNotifier.h"
-#include "luci_interpreter/MemoryManager.h"
-
-#include <memory>
-#include <vector>
-
-namespace luci_interpreter
-{
-
-class RuntimeModule
-{
-public:
- explicit RuntimeModule(EventNotifier *event_notifier) : _event_notifier(event_notifier) {}
-
- EventNotifier *getEventNotifier() const { return _event_notifier; }
-
- RuntimeGraph *addGraph(IMemoryManager *memory_manager)
- {
- _graphs.push_back(std::make_unique<RuntimeGraph>(this, memory_manager));
- return _graphs.back().get();
- }
-
- const std::vector<Tensor *> &getInputTensors() const { return getMainGraph()->getInputTensors(); }
- const std::vector<Tensor *> &getOutputTensors() const
- {
- return getMainGraph()->getOutputTensors();
- }
-
- void execute() const { getMainGraph()->execute(); }
-
-private:
- RuntimeGraph *getMainGraph() const { return _graphs[0].get(); }
-
- EventNotifier *const _event_notifier;
- std::vector<std::unique_ptr<RuntimeGraph>> _graphs;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci_interpreter/core/Tensor.h"
-
-#include <cstring>
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-Tensor::Tensor(DataType element_type, Shape shape, AffineQuantization quantization,
- std::string name)
- : _element_type(element_type), _shape(std::move(shape)), _quantization(std::move(quantization)),
- _name(std::move(name)), _data_allocated(false)
-{
-}
-
-void Tensor::readData(void *data_ptr, size_t data_size) const
-{
- const size_t element_size = getDataTypeSize(element_type());
- const int32_t num_elements = shape().num_elements();
- if (data_size != num_elements * element_size)
- {
- throw std::invalid_argument("Invalid data size.");
- }
- assert(data_ptr != nullptr);
- std::memcpy(data_ptr, data<void>(), data_size);
-}
-
-void Tensor::writeData(const void *data_ptr, size_t data_size)
-{
- const size_t element_size = getDataTypeSize(element_type());
- const int32_t num_elements = shape().num_elements();
- if (data_size != num_elements * element_size)
- {
- throw std::invalid_argument("Invalid data size.");
- }
- assert(data_ptr != nullptr);
- std::memcpy(data<void>(), data_ptr, data_size);
-}
-
-void Tensor::resize(const Shape &new_shape) { _shape = new_shape; }
-
-} // namespace luci_interpreter
+++ /dev/null
-set(SOURCES
- "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/GraphBuilderRegistry.h"
- GraphBuilderRegistry.cpp)
-
-# include specific builders
-file(GLOB_RECURSE NODES "Nodes/*")
-list(APPEND SOURCES ${NODES})
-
-add_library(${LUCI_INTERPRETER_IMPORT} STATIC ${SOURCES})
-if (NOT NNCC_LIBRARY_NO_PIC)
- set_target_properties(${LUCI_INTERPRETER_IMPORT} PROPERTIES POSITION_INDEPENDENT_CODE ON)
-endif(NOT NNCC_LIBRARY_NO_PIC)
-
-target_include_directories(${LUCI_INTERPRETER_IMPORT} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
-target_link_libraries(${LUCI_INTERPRETER_IMPORT} PUBLIC luci_import)
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "luci_interpreter/GraphBuilderRegistry.h"
-#include "Nodes/CircleReferencingConst.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying()
-{
- auto builder = std::make_unique<luci::GraphBuilderRegistry>();
- {
- // redefine NodeBuilder of BUFFER type
- builder->add(std::make_unique<CircleReferencingConstNodeBuilder>());
- }
-
- return builder;
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CircleReferencingConst.h"
-
-#include <vector>
-
-namespace
-{
-
-// helper struct which describes data loaded to custom_options of CircleReferencingConst node
-struct ConstDataReference
-{
- const uint8_t *data = nullptr;
- uint32_t size = 0;
-};
-
-} // namespace
-
-namespace luci_interpreter
-{
-using namespace luci;
-
-CircleNode *CircleReferencingConstNodeBuilder::build(TensorIndex tensor_index,
- GraphBuilderContext *context) const
-{
- assert(tensor_index >= 0);
-
- const auto graph = context->graph();
- const auto reader = context->reader();
- const auto tensors = reader->tensors();
- auto const const_tensor = tensors[tensor_index];
- assert(const_tensor != nullptr);
- if (const_tensor->is_variable())
- {
- // Create CircleVariable for variable
- return nullptr;
- }
-
- auto const buffer = wrap(reader->buffers()[const_tensor->buffer()]->data());
- auto const const_dims = wrap(const_tensor->shape()); // in NHWC
- if (const_dims.empty() && buffer.empty())
- {
- // unknown shape tensor and scalar tensor
- return nullptr;
- }
-
- // if tensor_index is used as output to some other operator, this is not a constant
- auto tensoroutputs = context->tensoroutputs();
- if (tensoroutputs->find(tensor_index))
- {
- // other operator output tensor
- return nullptr;
- }
-
- uint32_t num_elements = 1;
- for (uint32_t r = 0; r < const_dims.size(); ++r)
- {
- num_elements = num_elements * const_dims[r];
- }
-
- if (buffer.empty() && num_elements > 0)
- {
- // normal empty tensor
- return nullptr;
- }
-
- // create CircleReferencingConst
- auto custom_node = graph->nodes()->create<CircleCustom>(0, 1);
- {
- custom_node->custom_code("CircleReferencingConst");
-
- copy_tensor_attributes(const_tensor, custom_node);
- custom_node->shape_status(luci::ShapeStatus::VALID);
-
- // custom options stores size of buffer and pointer's value to buffer's data
- {
- std::vector<uint8_t> custom_options(sizeof(ConstDataReference));
- {
- auto &const_data_ref = *reinterpret_cast<ConstDataReference *>(custom_options.data());
- const_data_ref = {buffer.data(), buffer.size()};
- }
- custom_node->custom_options(custom_options);
- }
- }
-
- // Output of CircleCustom node presented with CircleConstNode
- auto out_node = graph->nodes()->create<CircleCustomOut>();
- {
- out_node->index(0);
- out_node->input(custom_node);
-
- copy_tensor_attributes(const_tensor, out_node);
- out_node->shape_status(luci::ShapeStatus::VALID);
- }
-
- return out_node;
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
-#define __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
-
-#include <luci/Import/NodeBuilder.h>
-
-#include <luci/IR/Nodes/CircleConst.h>
-
-namespace luci_interpreter
-{
-using namespace luci;
-
-/**
- * @brief Builder creates CircleCustom node with pointer to constants data from Tensor with buffer.
- */
-class CircleReferencingConstNodeBuilder : public TypedNodeBuilder<NodeBuilderType::BUFFER>
-{
-public:
- CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final;
-};
-
-} // namespace luci_interpreter
-
-#endif // __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Add.h"
-
-#include "kernels/BinaryOpCommon.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/add.h>
-#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams ¶ms)
- : KernelWithParams<AddParams>({input1, input2}, {output}, params)
-{
-}
-
-void Add::configure()
-{
- LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
- LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
- if (input1()->element_type() == DataType::S16)
- {
- LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
- input2()->zero_points().size() == 1);
- LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
- output()->zero_point() == 0);
- }
-
- output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void Add::execute() const
-{
- switch (input1()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::S64:
- evalInteger<int64_t>();
- break;
- case DataType::S32:
- evalInteger<int32_t>();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- case DataType::S16:
- evalQuantizedS16();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Add::evalFloat() const
-{
- tflite::ArithmeticParams params{};
- fillArithmeticActivationRange<float>(params, _params.activation);
-
- const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
-
- if (need_broadcast)
- {
- tflite::reference_ops::BroadcastAdd4DSlow(
- params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
- getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
- }
- else
- {
- tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<float>(input1()),
- getTensorShape(input2()), getTensorData<float>(input2()),
- getTensorShape(output()), getTensorData<float>(output()));
- }
-}
-
-template <typename T> void Add::evalInteger() const
-{
- tflite::ArithmeticParams params{};
- fillArithmeticActivationRange<T>(params, _params.activation);
-
- const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
-
- if (need_broadcast)
- {
- tflite::reference_ops::BroadcastAdd4DSlow(
- params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
- getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
- }
- else
- {
- tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<T>(input1()),
- getTensorShape(input2()), getTensorData<T>(input2()),
- getTensorShape(output()), getTensorData<T>(output()));
- }
-}
-
-void Add::evalQuantized() const
-{
- const auto input1_scale = static_cast<double>(input1()->scale());
- const auto input2_scale = static_cast<double>(input2()->scale());
- const auto output_scale = static_cast<double>(output()->scale());
-
- const int left_shift = 20;
- const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
- const double real_input1_multiplier = input1_scale / twice_max_input_scale;
- const double real_input2_multiplier = input2_scale / twice_max_input_scale;
- const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
-
- int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
- int input1_shift{}, input2_shift{}, output_shift{};
- quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
- quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
- quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
-
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- tflite::ArithmeticParams params{};
- params.left_shift = left_shift;
- // The kernel expects inputs' zero points to be negated.
- params.input1_offset = -input1()->zero_point(); // Note the '-'.
- params.input1_multiplier = input1_multiplier;
- params.input1_shift = input1_shift;
- params.input2_offset = -input2()->zero_point(); // Note the '-'.
- params.input2_multiplier = input2_multiplier;
- params.input2_shift = input2_shift;
- params.output_offset = output()->zero_point();
- params.output_multiplier = output_multiplier;
- params.output_shift = output_shift;
- params.quantized_activation_min = activation_min;
- params.quantized_activation_max = activation_max;
-
- const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
-
- if (need_broadcast)
- {
- tflite::reference_ops::BroadcastAdd4DSlow(
- params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
- getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
- }
- else
- {
- tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
- getTensorShape(input2()), getTensorData<uint8_t>(input2()),
- getTensorShape(output()), getTensorData<uint8_t>(output()));
- }
-}
-
-void Add::evalQuantizedS16() const
-{
- const auto input1_scale = static_cast<double>(input1()->scale());
- const auto input2_scale = static_cast<double>(input2()->scale());
- const auto output_scale = static_cast<double>(output()->scale());
-
- constexpr int left_shift = 12;
- const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
- const double real_input1_multiplier = input1_scale / twice_max_input_scale;
- const double real_input2_multiplier = input2_scale / twice_max_input_scale;
- const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
-
- int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
- int input1_shift{}, input2_shift{}, output_shift{};
- quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
- quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
- quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
-
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- auto fn = [input1_multiplier, input1_shift, //
- input2_multiplier, input2_shift, //
- output_multiplier, output_shift, //
- activation_min, activation_max](int16_t input1_val, int16_t input2_val) {
- const int32_t shifted_input1_val = static_cast<int32_t>(input1_val) << left_shift;
- const int32_t shifted_input2_val = static_cast<int32_t>(input2_val) << left_shift;
- const int32_t scaled_input1_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
- shifted_input1_val, input1_multiplier, input1_shift);
- const int32_t scaled_input2_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
- shifted_input2_val, input2_multiplier, input2_shift);
- const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
- const int32_t raw_output = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
- raw_sum, output_multiplier, output_shift);
- const int32_t clamped_output = std::min(activation_max, std::max(activation_min, raw_output));
- return static_cast<int16_t>(clamped_output);
- };
-
- BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
- getTensorShape(input2()), getTensorData<int16_t>(input2()),
- getTensorShape(output()), getTensorData<int16_t>(output()), fn);
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/ArgMax.h"
-#include "kernels/Utils.h"
-#include "PALArgMax.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-ArgMax::ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams ¶ms)
- : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params)
-{
-}
-
-void ArgMax::configure()
-{
- assert(axis()->element_type() == DataType::S32 || axis()->element_type() == DataType::S64);
- assert(input()->shape().num_dims() >= 1);
- const Shape &input_shape = input()->shape();
- const int num_dims = input_shape.num_dims();
- Shape output_shape(num_dims - 1);
-
- // If axis value is negative, then update by adding input_shape's num_dims.
- // If updated value also negative, then assert.
- assert(axis()->shape().num_elements() == 1);
- int axis_value = getTensorData<int32_t>(axis())[0];
- if (axis_value < 0)
- axis_value = axis_value + num_dims;
- assert(axis_value >= 0);
-
- int j = 0;
- for (int i = 0; i < num_dims; i++)
- {
- if (i == axis_value)
- continue;
- output_shape.dim(j++) = input_shape.dim(i);
- }
-
- assert(output()->element_type() == _params.output_type);
-
- output()->resize(output_shape);
-}
-
-void ArgMax::execute() const
-{
-
-#define TF_LITE_ARG_MAX(data_type, axis_type, output_type) \
- luci_interpreter_pal::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \
- getTensorData<axis_type>(axis()), getTensorShape(output()), \
- getTensorData<output_type>(output()), std::greater<data_type>())
- if (axis()->element_type() == DataType::S32)
- {
- switch (_params.output_type)
- {
- case DataType::S32:
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- TF_LITE_ARG_MAX(float, int32_t, int32_t);
- break;
- case DataType::U8:
- TF_LITE_ARG_MAX(uint8_t, int32_t, int32_t);
- break;
- default:
- throw std::runtime_error("Unsupported input type.");
- }
- break;
- case DataType::S64:
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- TF_LITE_ARG_MAX(float, int32_t, int64_t);
- break;
- case DataType::U8:
- TF_LITE_ARG_MAX(uint8_t, int32_t, int64_t);
- break;
- default:
- throw std::runtime_error("Unsupported input type.");
- }
- break;
- default:
- throw std::runtime_error("Unsupported output type.");
- }
- }
- else
- {
- switch (_params.output_type)
- {
- case DataType::S32:
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- TF_LITE_ARG_MAX(float, int64_t, int32_t);
- break;
- case DataType::U8:
- TF_LITE_ARG_MAX(uint8_t, int64_t, int32_t);
- break;
- default:
- throw std::runtime_error("Unsupported input type.");
- }
- break;
- case DataType::S64:
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- TF_LITE_ARG_MAX(float, int64_t, int64_t);
- break;
- case DataType::U8:
- TF_LITE_ARG_MAX(uint8_t, int64_t, int64_t);
- break;
- default:
- throw std::runtime_error("Unsupported input type.");
- }
- break;
- default:
- throw std::runtime_error("Unsupported output type.");
- }
- }
-#undef TF_LITE_ARG_MAX
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/AveragePool2D.h"
-
-#include "kernels/Utils.h"
-
-#include "PALAveragePool2d.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
- const Pool2DParams ¶ms)
- : KernelWithParams<Pool2DParams>({input}, {output, scratchpad}, params)
-{
-}
-
-void AveragePool2D::configure()
-{
- if (input()->element_type() != output()->element_type())
- {
- throw std::runtime_error("Input Tensor and Output Tensor Type must be same");
- }
- if (input()->shape().num_dims() != 4)
- {
- throw std::runtime_error("Input Tensor Shape must be 4-D");
- }
- const Shape &input_shape = input()->shape();
-
- const int32_t batches = input_shape.dim(0);
- const int32_t input_height = input_shape.dim(1);
- const int32_t input_width = input_shape.dim(2);
- const int32_t depth = input_shape.dim(3);
-
- const int32_t output_height =
- computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
- const int32_t output_width =
- computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
-
- _padding_height =
- computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
- _padding_width =
- computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
- if (input()->element_type() == DataType::U8)
- {
- LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
- LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
- }
- else if (input()->element_type() == DataType::S16)
- {
- LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
- LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
- }
- else if (input()->element_type() == DataType::S8)
- {
- LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
- LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
- }
- output()->resize({batches, output_height, output_width, depth});
-
- auto scratchpad = getOutputTensors()[1];
- luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(),
- getTensorShape(input()), getTensorShape(output()));
-}
-
-void AveragePool2D::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- case DataType::S16:
- evalSInt16();
- break;
- case DataType::S8:
- evalSInt8();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void AveragePool2D::evalFloat() const
-{
- float activation_min{};
- float activation_max{};
- calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
- tflite::PoolParams params{};
- params.padding_values.height = _padding_height;
- params.padding_values.width = _padding_width;
- params.stride_height = _params.stride_height;
- params.stride_width = _params.stride_width;
- params.filter_height = _params.filter_height;
- params.filter_width = _params.filter_width;
- params.float_activation_min = activation_min;
- params.float_activation_max = activation_max;
-
- tflite::reference_ops::AveragePool(params, getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(output()), getTensorData<float>(output()));
-}
-
-void AveragePool2D::evalQuantized() const
-{
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- tflite::PoolParams params{};
- params.padding_values.height = _padding_height;
- params.padding_values.width = _padding_width;
- params.stride_height = _params.stride_height;
- params.stride_width = _params.stride_width;
- params.filter_height = _params.filter_height;
- params.filter_width = _params.filter_width;
- params.quantized_activation_min = activation_min;
- params.quantized_activation_max = activation_max;
-
- tflite::reference_ops::AveragePool(params, getTensorShape(input()),
- getTensorData<uint8_t>(input()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
-}
-
-void AveragePool2D::evalSInt8() const
-{
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
- tflite::PoolParams params{};
- params.padding_values.height = _padding_height;
- params.padding_values.width = _padding_width;
- params.stride_height = _params.stride_height;
- params.stride_width = _params.stride_width;
- params.filter_height = _params.filter_height;
- params.filter_width = _params.filter_width;
- params.quantized_activation_min = activation_min;
- params.quantized_activation_max = activation_max;
-
- auto scratchpad = getOutputTensors()[1];
- int8_t *scratchpad_data = nullptr;
- if (scratchpad->is_allocatable())
- scratchpad_data = scratchpad->data<int8_t>();
-
- luci_interpreter_pal::AveragePool<int8_t>(
- params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()),
- getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
-}
-
-void AveragePool2D::evalSInt16() const
-{
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- tflite::PoolParams params{};
- params.padding_values.height = _padding_height;
- params.padding_values.width = _padding_width;
- params.stride_height = _params.stride_height;
- params.stride_width = _params.stride_width;
- params.filter_height = _params.filter_height;
- params.filter_width = _params.filter_width;
- params.quantized_activation_min = activation_min;
- params.quantized_activation_max = activation_max;
-
- tflite::reference_integer_ops::AveragePool(
- params, getTensorShape(input()), getTensorData<int16_t>(input()), //
- getTensorShape(output()), getTensorData<int16_t>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/BatchMatMul.h"
-#include "kernels/Utils.h"
-
-#include "PALBatchMatMul.h"
-
-#include <tensorflow/lite/kernels/internal/reference/transpose.h>
-
-#include <stdexcept>
-
-namespace
-{
-
-tflite::RuntimeShape SwapRowColumnDims(const tflite::RuntimeShape &shape)
-{
- tflite::RuntimeShape swapped_shape(shape);
- const int32_t dims = shape.DimensionsCount();
- swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1));
- swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2));
- return swapped_shape;
-}
-
-} // namespace
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-BatchMatMul::BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp,
- Tensor *y_tmp, const BatchMatMulParams ¶ms)
- : KernelWithParams({x, y}, {output, x_tmp, y_tmp}, params)
-{
-}
-
-void BatchMatMul::configure()
-{
- auto lhs = x();
- auto rhs = y();
- auto adj_x = params().adj_x;
- auto adj_y = params().adj_y;
-
- // TODO Support non-float types
- if (lhs->element_type() != DataType::FLOAT32 || rhs->element_type() != DataType::FLOAT32)
- throw std::runtime_error("Unsupported type.");
-
- LUCI_INTERPRETER_CHECK(lhs->element_type() == rhs->element_type());
-
- auto lhs_rank = lhs->shape().num_dims();
- auto rhs_rank = rhs->shape().num_dims();
- LUCI_INTERPRETER_CHECK(lhs_rank >= 2 && lhs_rank <= 4);
- LUCI_INTERPRETER_CHECK(rhs_rank >= 2 && rhs_rank <= 4);
-
- auto lhs_scratchpad = temp_lhs();
- auto rhs_scratchpad = temp_rhs();
- luci_interpreter_pal::SetupScratchpadTensor(lhs_scratchpad, rhs_scratchpad, getTensorShape(lhs),
- getTensorShape(rhs));
-
- auto output_rank = std::max(lhs_rank, rhs_rank);
-
- auto extended_lhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(lhs));
- auto extended_rhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(rhs));
-
- // Ensure any batch dimensions obey broacasting rules.
- for (int i = 0; i < output_rank - 2; ++i)
- {
- const int lhs_dim = extended_lhs_shape.Dims(i);
- const int rhs_dim = extended_rhs_shape.Dims(i);
- if (lhs_dim != rhs_dim)
- {
- if (lhs_dim != 1)
- {
- LUCI_INTERPRETER_CHECK(rhs_dim == 1);
- }
- }
- }
-
- // Ensure other dimensions work for matrix multiplication.
- int accum_dim_lhs =
- adj_x ? extended_lhs_shape.Dims(output_rank - 2) : extended_lhs_shape.Dims(output_rank - 1);
- int accum_dim_rhs =
- adj_y ? extended_rhs_shape.Dims(output_rank - 1) : extended_rhs_shape.Dims(output_rank - 2);
- LUCI_INTERPRETER_CHECK(accum_dim_lhs == accum_dim_rhs);
-
- Shape output_shape(output_rank);
- // Fill in any broadcast dimensions.
- for (int i = 0; i < output_rank - 2; ++i)
- {
- const int lhs_dim = extended_lhs_shape.Dims(i);
- const int rhs_dim = extended_rhs_shape.Dims(i);
- int broadcast_dim = lhs_dim;
- if ((lhs_dim != rhs_dim) && (lhs_dim == 1))
- {
- broadcast_dim = rhs_dim;
- }
- output_shape.dim(i) = broadcast_dim;
- }
- // Fill in the matmul dimensions.
- int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2;
- int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1;
-
- output_shape.dim(output_rank - 2) = extended_lhs_shape.Dims(lhs_rows_index);
- output_shape.dim(output_rank - 1) = extended_rhs_shape.Dims(rhs_cols_index);
-
- output()->resize(output_shape);
-}
-
-void TransposeRowsColumns(const Tensor *tensor_in, Tensor *tensor_out)
-{
- tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in));
- tflite::RuntimeShape shape(getTensorShape(tensor_in));
- tflite::TransposeParams params;
- int rank = shape.DimensionsCount();
- params.perm_count = rank;
- for (int i = 0; i < rank - 2; ++i)
- {
- params.perm[i] = i;
- }
- // Transpose the last two dimensions.
- params.perm[rank - 2] = rank - 1;
- params.perm[rank - 1] = rank - 2;
- transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2));
- transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1));
- switch (tensor_in->element_type())
- {
- case DataType::FLOAT32:
- tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in),
- transposed_shape, getTensorData<float>(tensor_out));
- break;
- default:
- throw std::runtime_error("Only suppport fp32 BatchMatMul for now.");
- }
-}
-
-void BatchMatMul::execute() const
-{
- auto lhs = x();
- auto rhs = y();
-
- bool adj_x = params().adj_x;
- bool adj_y = params().adj_y;
-
- auto orig_lhs_shape = getTensorShape(lhs);
- auto orig_rhs_shape = getTensorShape(rhs);
-
- auto rhs_tensor = adj_y ? rhs : temp_rhs();
- auto lhs_tensor = adj_x ? temp_lhs() : lhs;
- if (not adj_y)
- {
- TransposeRowsColumns(rhs, temp_rhs());
- }
- if (adj_x)
- {
- TransposeRowsColumns(lhs, temp_lhs());
- }
- tflite::RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape);
- tflite::RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape);
-
- switch (x()->element_type())
- {
- case DataType::FLOAT32:
- luci_interpreter_pal::BatchMatMul(rhs_shape, getTensorData<float>(rhs_tensor), lhs_shape,
- getTensorData<float>(lhs_tensor), getTensorShape(output()),
- getTensorData<float>(output()));
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/BatchToSpaceND.h"
-#include "kernels/Utils.h"
-
-#include "PALBatchToSpaceND.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-namespace
-{
-const int kInputMinDimensionNum = 3;
-const int kInputMaxDimensionNum = 4;
-} // namespace
-
-BatchToSpaceND::BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
- Tensor *output)
- : Kernel({input, block_shape, crops}, {output})
-{
-}
-
-void BatchToSpaceND::configure()
-{
-
- const auto *block_shape_data = block_shape()->data<int32_t>();
- const auto *crops_data = crops()->data<int32_t>();
- LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
- LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-
- int spatial_dims_num = input()->shape().num_dims() - 2;
-
- LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
- LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
-
- LUCI_INTERPRETER_CHECK(crops()->shape().num_dims() == 2);
- LUCI_INTERPRETER_CHECK(crops()->shape().dim(0) == spatial_dims_num);
- LUCI_INTERPRETER_CHECK(crops()->shape().dim(1) == 2);
- for (int i = 0; i < spatial_dims_num * 2; ++i)
- {
- LUCI_INTERPRETER_CHECK(crops_data[i] >= 0);
- }
-
- Shape output_shape = Shape(input()->shape().num_dims());
- int output_batch_size = input()->shape().dim(0);
- for (int i = 0; i < spatial_dims_num; ++i)
- {
- LUCI_INTERPRETER_CHECK(output_batch_size % block_shape_data[i] == 0);
- output_batch_size = output_batch_size / block_shape_data[i];
- output_shape.dim(i + 1) =
- input()->shape().dim(i + 1) * block_shape_data[i] - crops_data[i * 2] - crops_data[i * 2 + 1];
- }
-
- output_shape.dim(0) = output_batch_size;
- output_shape.dim(input()->shape().num_dims() - 1) =
- input()->shape().dim(input()->shape().num_dims() - 1);
- output()->resize(output_shape);
-}
-
-void BatchToSpaceND::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- luci_interpreter_pal::BatchToSpaceND(
- getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()),
- getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
- getTensorData<int32_t>(crops()), getTensorShape(output()), getTensorData<float>(output()));
- break;
- case DataType::U8:
- luci_interpreter_pal::BatchToSpaceND(
- getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()),
- getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
- getTensorData<int32_t>(crops()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-set(SOURCES
- BinaryOpCommon.h
- Utils.h
- Utils.cpp
- "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h"
- ${LUCI_INTERPRETER_SOURCE_DIR}/TestMemoryManager.cpp
- "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h"
- ${LUCI_INTERPRETER_SOURCE_DIR}/SimpleMemoryManager.cpp)
-
-macro(REGISTER_KERNEL NODE)
- list(APPEND SOURCES "${NODE}.h")
- list(APPEND SOURCES "${NODE}.cpp")
-endmacro(REGISTER_KERNEL)
-
-include(${KERNEL_REGISTER_FILE})
-
-add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES})
-if (NOT NNCC_LIBRARY_NO_PIC)
- set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
-endif(NOT NNCC_LIBRARY_NO_PIC)
-target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
-
-target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE})
-target_link_libraries(${LUCI_INTERPRETER_KERNELS} PRIVATE nncc_common)
-
-add_pal_to_target(${LUCI_INTERPRETER_KERNELS})
-
-if(NOT ENABLE_TEST)
- return()
-endif(NOT ENABLE_TEST)
-
-nnas_find_package(GTest REQUIRED)
-
-macro(REGISTER_KERNEL NODE)
- list(APPEND TEST_SOURCES "${NODE}.test.cpp")
-endmacro(REGISTER_KERNEL)
-
-include(${KERNEL_REGISTER_FILE})
-
-list(APPEND TEST_SOURCES TestUtils.h TestUtils.cpp)
-
-GTest_AddTest(${LUCI_INTERPRETER_KERNELS}_test ${TEST_SOURCES})
-target_link_libraries(${LUCI_INTERPRETER_KERNELS}_test ${LUCI_INTERPRETER_KERNELS})
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Cast.h"
-#include "kernels/Utils.h"
-
-namespace
-{
-
-using namespace luci_interpreter;
-using namespace luci_interpreter::kernels;
-
-template <typename InT, typename OutT>
-void cast_data(const InT *in_data, OutT *out_data, uint32_t elements_count)
-{
- std::transform(in_data, in_data + elements_count, out_data,
- [](InT a) { return static_cast<OutT>(a); });
-}
-
-template <typename InT> void cast_from_pointer_to_tensor(const InT *in_data, Tensor *out_tensor)
-{
- auto const out_type = out_tensor->element_type();
- auto const elements_count = out_tensor->shape().num_elements();
-
- switch (out_type)
- {
- case loco::DataType::U8:
- cast_data(in_data, getTensorData<uint8_t>(out_tensor), elements_count);
- break;
- case loco::DataType::U16:
- cast_data(in_data, getTensorData<uint16_t>(out_tensor), elements_count);
- break;
- case loco::DataType::U32:
- cast_data(in_data, getTensorData<uint32_t>(out_tensor), elements_count);
- break;
- case loco::DataType::U64:
- cast_data(in_data, getTensorData<uint64_t>(out_tensor), elements_count);
- break;
- case loco::DataType::S8:
- cast_data(in_data, getTensorData<int8_t>(out_tensor), elements_count);
- break;
- case loco::DataType::S16:
- cast_data(in_data, getTensorData<int16_t>(out_tensor), elements_count);
- break;
- case loco::DataType::S32:
- cast_data(in_data, getTensorData<int32_t>(out_tensor), elements_count);
- break;
- case loco::DataType::S64:
- cast_data(in_data, getTensorData<int64_t>(out_tensor), elements_count);
- break;
- case loco::DataType::FLOAT32:
- cast_data(in_data, getTensorData<float>(out_tensor), elements_count);
- break;
- case loco::DataType::BOOL:
- cast_data(in_data, getTensorData<bool>(out_tensor), elements_count);
- break;
- default:
- throw std::runtime_error("Unsupported output type.");
- }
-}
-
-void cast_from_tensor_to_tensor(const Tensor *in_tensor, Tensor *out_tensor)
-{
- auto in_type = in_tensor->element_type();
-
- switch (in_type)
- {
- case loco::DataType::U8:
- cast_from_pointer_to_tensor(getTensorData<uint8_t>(in_tensor), out_tensor);
- break;
- case loco::DataType::U16:
- cast_from_pointer_to_tensor(getTensorData<uint16_t>(in_tensor), out_tensor);
- break;
- case loco::DataType::U32:
- cast_from_pointer_to_tensor(getTensorData<uint32_t>(in_tensor), out_tensor);
- break;
- case loco::DataType::U64:
- cast_from_pointer_to_tensor(getTensorData<uint64_t>(in_tensor), out_tensor);
- break;
- case loco::DataType::S8:
- cast_from_pointer_to_tensor(getTensorData<int8_t>(in_tensor), out_tensor);
- break;
- case loco::DataType::S16:
- cast_from_pointer_to_tensor(getTensorData<int16_t>(in_tensor), out_tensor);
- break;
- case loco::DataType::S32:
- cast_from_pointer_to_tensor(getTensorData<int32_t>(in_tensor), out_tensor);
- break;
- case loco::DataType::S64:
- cast_from_pointer_to_tensor(getTensorData<int64_t>(in_tensor), out_tensor);
- break;
- case loco::DataType::FLOAT32:
- cast_from_pointer_to_tensor(getTensorData<float>(in_tensor), out_tensor);
- break;
- case loco::DataType::BOOL:
- cast_from_pointer_to_tensor(getTensorData<bool>(in_tensor), out_tensor);
- break;
- default:
- throw std::runtime_error("Unsupported input type.");
- }
-}
-
-} // namespace
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Cast::Cast(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Cast::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() != loco::DataType::Unknown);
- LUCI_INTERPRETER_CHECK(output()->element_type() != loco::DataType::Unknown);
-
- const Shape &shape = input()->shape();
- output()->resize(shape);
-}
-
-void Cast::execute() const
-{
- assert(input()->shape().num_elements() == output()->shape().num_elements());
-
- cast_from_tensor_to_tensor(input(), output());
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Concatenation.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/concatenation.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Concatenation::Concatenation(std::vector<const Tensor *> inputs, Tensor *output,
- const ConcatenationParams ¶ms)
- : KernelWithParams<ConcatenationParams>(std::move(inputs), {output}, params)
-{
-}
-
-void Concatenation::configure()
-{
- const int num_inputs = _inputs.size();
- LUCI_INTERPRETER_CHECK(num_inputs > 0);
- const Tensor *t0 = _inputs[0];
-
- // TODO: Support concat with fused activation function
- LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::NONE);
-
- int axis = _params.axis;
- if (axis < 0)
- axis += t0->shape().num_dims();
- LUCI_INTERPRETER_CHECK(axis >= 0 && axis < t0->shape().num_dims());
-
- int32_t sum_axis = t0->shape().dim(axis);
- for (int i = 1; i < num_inputs; ++i)
- {
- const Tensor *tensor = _inputs[i];
- LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
- LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
- for (int d = 0; d < t0->shape().num_dims(); ++d)
- {
- if (d == axis)
- {
- sum_axis += tensor->shape().dim(axis);
- }
- else
- {
- LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
- }
- }
- }
-
- Shape output_shape = t0->shape();
- output_shape.dim(axis) = sum_axis;
-
- // If input tensors are INT8 type then quantization parameters of all input tensors and the output
- // should be the same
- for (auto current_tensor : _inputs)
- {
- if (current_tensor->element_type() == DataType::S8)
- {
- LUCI_INTERPRETER_CHECK(current_tensor->quantized_dimension() ==
- output()->quantized_dimension());
-
- LUCI_INTERPRETER_CHECK(current_tensor->zero_points().size() ==
- current_tensor->scales().size());
- LUCI_INTERPRETER_CHECK(current_tensor->zero_points() == output()->zero_points());
- LUCI_INTERPRETER_CHECK(current_tensor->scales() == output()->scales());
- }
- }
- output()->resize(output_shape);
-}
-
-void Concatenation::execute() const
-{
- switch (_inputs[0]->element_type())
- {
- case DataType::FLOAT32:
- evalGeneric<float>();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- case DataType::S8:
- evalGeneric<int8_t>();
- break;
- case DataType::S32:
- evalGeneric<int32_t>();
- break;
- case DataType::S64:
- evalGeneric<int64_t>();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-template <typename T> void Concatenation::evalGeneric() const
-{
- int axis = _params.axis;
- if (axis < 0)
- axis += output()->shape().num_dims();
-
- VectorOfTensors<T, true> inputs(_inputs);
- tflite::ConcatenationParams params{};
- params.axis = axis;
- params.inputs_count = _inputs.size();
- tflite::reference_ops::Concatenation(params, inputs.shapes(), inputs.data(),
- getTensorShape(output()), getTensorData<T>(output()));
-}
-
-void Concatenation::evalQuantized() const
-{
- int axis = _params.axis;
- if (axis < 0)
- axis += output()->shape().num_dims();
-
- VectorOfQuantizedTensors<true> inputs(_inputs);
- tflite::ConcatenationParams params{};
- params.axis = axis;
- params.input_zeropoint = inputs.zero_point();
- params.input_scale = inputs.scale();
- params.inputs_count = _inputs.size();
- params.output_zeropoint = output()->zero_point();
- params.output_scale = output()->scale();
-
- tflite::reference_ops::ConcatenationWithScaling(params, inputs.shapes(), inputs.data(),
- getTensorShape(output()),
- getTensorData<uint8_t>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_CONCATENATION_H
-#define LUCI_INTERPRETER_KERNELS_CONCATENATION_H
-
-#include "core/Kernel.h"
-#include "core/KernelParams.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class Concatenation : public KernelWithParams<ConcatenationParams>
-{
-public:
- Concatenation(std::vector<const Tensor *> inputs, Tensor *output,
- const ConcatenationParams ¶ms);
-
- const Tensor *input(int index) const { return _inputs[index]; }
- Tensor *output() const { return _outputs[0]; }
-
- void configure() override;
- void execute() const override;
-
-private:
- template <typename T> void evalGeneric() const;
- void evalQuantized() const;
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_CONCATENATION_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Concatenation.h"
-#include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-class ConcatenationTest : public ::testing::Test
-{
-protected:
- void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
-
- std::unique_ptr<IMemoryManager> _memory_manager;
-};
-
-TEST_F(ConcatenationTest, Float)
-{
- std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
- std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
- Tensor input1_tensor =
- makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
- Tensor input2_tensor =
- makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
- ConcatenationParams params{};
-
- // Try different 'axis' and expect different results.
- {
- params.axis = 0;
- params.activation = luci::FusedActFunc::NONE;
-
- Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
- kernel.configure();
- for (auto t : kernel.getOutputTensors())
- {
- _memory_manager->allocate_memory(*t);
- }
- kernel.execute();
-
- EXPECT_THAT(extractTensorData<float>(output_tensor),
- FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
- }
- {
- params.axis = -2; // Same as '0'.
- params.activation = luci::FusedActFunc::NONE;
-
- Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
- kernel.configure();
- _memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- EXPECT_THAT(extractTensorData<float>(output_tensor),
- FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
- }
- {
- params.axis = 1;
- params.activation = luci::FusedActFunc::NONE;
-
- Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
- kernel.configure();
- _memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- EXPECT_THAT(extractTensorData<float>(output_tensor),
- FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
- }
- {
- params.axis = -1; // Same as '1'.
- params.activation = luci::FusedActFunc::NONE;
-
- Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
- kernel.configure();
- _memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- EXPECT_THAT(extractTensorData<float>(output_tensor),
- FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
- }
-}
-
-TEST_F(ConcatenationTest, Input_Number_Check_NEG)
-{
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
- ConcatenationParams params{};
-
- params.axis = -1;
- params.activation = luci::FusedActFunc::NONE;
-
- Concatenation kernel({}, &output_tensor, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(ConcatenationTest, Invalid_Axis_NEG)
-{
- std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
- std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
- Tensor input1_tensor =
- makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
- Tensor input2_tensor =
- makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
- ConcatenationParams params{};
-
- params.axis = -3;
- params.activation = luci::FusedActFunc::NONE;
-
- Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(ConcatenationTest, Mismatching_Input_Type_NEG)
-{
- std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
- std::vector<uint8_t> input2_data{7, 8, 9, 10, 11, 12};
- Tensor input1_tensor =
- makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
- Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
- ConcatenationParams params{};
-
- params.axis = -1;
- params.activation = luci::FusedActFunc::NONE;
-
- Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG)
-{
- std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
- std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
- Tensor input1_tensor =
- makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
- Tensor input2_tensor =
- makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
- ConcatenationParams params{};
-
- params.axis = -1;
- params.activation = luci::FusedActFunc::NONE;
-
- Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG)
-{
- std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
- std::vector<float> input2_data{7, 8, 9, 10, 11, 12, 13, 14, 15};
- Tensor input1_tensor =
- makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
- Tensor input2_tensor =
- makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
- ConcatenationParams params{};
-
- params.axis = -1;
- params.activation = luci::FusedActFunc::NONE;
-
- Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(ConcatenationTest, Int8_Mismatching_Input_Type_NEG)
-{
- std::vector<uint8_t> input1_data{1, 2, 3, 4};
- std::vector<int8_t> input2_data{5, 6, 7, 8};
- Tensor input1_tensor = makeInputTensor<DataType::U8>({2, 2}, input1_data, _memory_manager.get());
- Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 2}, input2_data, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::S8);
- ConcatenationParams params{};
-
- params.axis = -1;
- params.activation = luci::FusedActFunc::NONE;
-
- Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(ConcatenationTest, Int8_Mismatching_Input_Output_Quant_Params_NEG)
-{
- std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
- std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
- int quantized_dimension = 3;
- std::vector<float> scales{0.1, 0.2, 0.3};
- std::vector<int32_t> zero_points{1, -1, 1};
-
- Tensor input1_tensor = makeInputTensor<DataType::S8>(
- {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input1_data, _memory_manager.get());
- Tensor input2_tensor = makeInputTensor<DataType::S8>(
- {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input2_data, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::S8, scales.at(0), zero_points.at(0));
- ConcatenationParams params{};
-
- params.axis = -1;
- params.activation = luci::FusedActFunc::NONE;
-
- Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(ConcatenationTest, Int8_Mismatching_Zero_Point_NEG)
-{
- std::vector<float> input1_data{1, 2, 3, 4};
- std::vector<float> input2_data{5, 6, 7, 8};
- float scale = 0.1;
- int32_t zero_point_1 = 1;
- int32_t zero_point_2 = -1;
-
- Tensor input1_tensor =
- makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_1, input1_data, _memory_manager.get());
- Tensor input2_tensor =
- makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_2, input2_data, _memory_manager.get());
-
- Tensor output_tensor = makeOutputTensor(DataType::S8, scale, zero_point_1);
- ConcatenationParams params{};
-
- params.axis = -1;
- params.activation = luci::FusedActFunc::NONE;
-
- Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-// TODO: Remove this test when concat w/ fused_activation is supported
-TEST_F(ConcatenationTest, With_Fused_Activation_NEG)
-{
- std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
- std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
- Tensor input1_tensor =
- makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
- Tensor input2_tensor =
- makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
- ConcatenationParams params{};
-
- params.axis = 1;
- params.activation = luci::FusedActFunc::RELU;
-
- Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Conv2D.h"
-
-#include "kernels/Utils.h"
-
-#include "PALConv2d.h"
-
-#include <stdexcept>
-#include <thread>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
- Tensor *scratchpad, const Conv2DParams ¶ms)
- : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, scratchpad}, params)
-{
-}
-
-void Conv2D::configure()
-{
- // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
- // | input filter bias output |
- // ----+---------------------------+
- // (1) | float float float float |
- // (2) | float int8 float float | hybrid
- // (3) | uint8 uint8 int32 uint8 | quantized
- // (4) | int8 int8 int32 int8 | quantized per channel
- //
- // We only support (1), (3) and (4) for now, and additionally the following:
- // | input filter bias output |
- // ----+---------------------------+
- // (5) | int16 int16 int64 int16 |
- //
- if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
- {
- LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
- }
- else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
- {
- LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
- }
- else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
- {
- LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
- LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
- LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
- static_cast<size_t>(filter()->shape().dim(0)));
- for (auto zerop : filter()->zero_points())
- {
- LUCI_INTERPRETER_CHECK(zerop == 0);
- }
- }
- else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
- {
- LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
- }
- else
- {
- throw std::runtime_error("Unsupported type.");
- }
- LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
-
- const Shape &input_shape = input()->shape();
- const Shape &filter_shape = filter()->shape();
- LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
-
- const int32_t batches = input_shape.dim(0);
- const int32_t input_height = input_shape.dim(1);
- const int32_t input_width = input_shape.dim(2);
- const int32_t output_depth = filter_shape.dim(0);
- const int32_t filter_height = filter_shape.dim(1);
- const int32_t filter_width = filter_shape.dim(2);
- LUCI_INTERPRETER_CHECK(filter_shape.dim(3) == input_shape.dim(3));
-
- LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
- bias()->shape().dim(0) == output_depth));
-
- const int32_t output_height =
- computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
- _params.dilation_height_factor);
- const int32_t output_width =
- computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
- _params.dilation_width_factor);
-
- _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
- input_height, filter_height, output_height);
- _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
- filter_width, output_width);
-
- output()->resize({batches, output_height, output_width, output_depth});
-
- // Allocate tensor for scratchpad, if needed.
- tflite::ConvParams params{};
- params.padding_values.height = _padding_height;
- params.padding_values.width = _padding_width;
- params.stride_height = _params.stride_height;
- params.stride_width = _params.stride_width;
- params.dilation_height_factor = _params.dilation_height_factor;
- params.dilation_width_factor = _params.dilation_width_factor;
- auto scratchpad = getOutputTensors()[1];
- luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), params,
- getTensorShape(input()), getTensorShape(filter()),
- getTensorShape(output()));
-
- switch (_params.activation)
- {
- case Activation::NONE:
- case Activation::RELU:
- case Activation::RELU6:
- case Activation::RELU_N1_TO_1:
- break;
- default:
- throw std::runtime_error("Unsupported fused activation");
- }
-}
-
-void Conv2D::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- if (filter()->element_type() == DataType::FLOAT32)
- {
- evalFloat();
- break;
- }
- throw std::runtime_error("Unsupported type.");
- case DataType::U8:
- if (filter()->scales().size() == 1)
- {
- evalQuantized();
- }
- else if (filter()->scales().size() > 1)
- {
- LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
- LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
- static_cast<size_t>(filter()->shape().dim(0)));
- evalQuantizedPerChannel();
- }
- break;
- case DataType::S8:
- evalQuantizedS8PerChannel();
- break;
- case DataType::S16:
- evalQuantizedS16();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Conv2D::evalFloat() const
-{
- float activation_min{};
- float activation_max{};
- calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
- tflite::ConvParams params{};
- params.padding_values.height = _padding_height;
- params.padding_values.width = _padding_width;
- params.stride_height = _params.stride_height;
- params.stride_width = _params.stride_width;
- params.dilation_height_factor = _params.dilation_height_factor;
- params.dilation_width_factor = _params.dilation_width_factor;
- params.float_activation_min = activation_min;
- params.float_activation_max = activation_max;
-
- auto scratchpad = getOutputTensors()[1];
- float *scratchpad_data = nullptr;
- if (scratchpad->is_allocatable())
- scratchpad_data = scratchpad->data<float>();
-
- luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(filter()), getTensorData<float>(filter()),
- getTensorShape(bias()), getTensorData<float>(bias()),
- getTensorShape(output()), getTensorData<float>(output()),
- getTensorShape(scratchpad), scratchpad_data);
-}
-
-void Conv2D::evalQuantized() const
-{
- const auto input_scale = static_cast<double>(input()->scale());
- const auto filter_scale = static_cast<double>(filter()->scale());
- const auto output_scale = static_cast<double>(output()->scale());
-
- const double real_multiplier = input_scale * filter_scale / output_scale;
- int32_t output_multiplier{};
- int output_shift{};
- quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
-
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- tflite::ConvParams params{};
- params.padding_values.height = _padding_height;
- params.padding_values.width = _padding_width;
- params.stride_height = _params.stride_height;
- params.stride_width = _params.stride_width;
- params.dilation_height_factor = _params.dilation_height_factor;
- params.dilation_width_factor = _params.dilation_width_factor;
- // The kernel expects input and filter zero points to be negated.
- params.input_offset = -input()->zero_point(); // Note the '-'.
- params.weights_offset = -filter()->zero_point(); // Note the '-'.
- params.output_offset = output()->zero_point();
- params.output_multiplier = output_multiplier;
- params.output_shift = output_shift;
- params.quantized_activation_min = activation_min;
- params.quantized_activation_max = activation_max;
-
- auto scratchpad = getOutputTensors()[1];
- luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
- getTensorShape(filter()), getTensorData<uint8_t>(filter()),
- getTensorShape(bias()), getTensorData<int32_t>(bias()),
- getTensorShape(output()), getTensorData<uint8_t>(output()),
- getTensorShape(scratchpad), getTensorData<uint8_t>(scratchpad));
-}
-
-void Conv2D::evalQuantizedPerChannel() const
-{
- const auto *input_data = getTensorData<uint8_t>(input());
- const auto *filter_data = getTensorData<uint8_t>(filter());
- const auto *bias_data = getTensorData<int32_t>(bias());
- auto *output_data = getTensorData<uint8_t>(output());
-
- const Shape &input_shape = input()->shape();
- const Shape &filter_shape = filter()->shape();
- const Shape &output_shape = output()->shape();
-
- const int32_t batches = input_shape.dim(0);
- const int32_t input_height = input_shape.dim(1);
- const int32_t input_width = input_shape.dim(2);
- const int32_t input_depth = input_shape.dim(3);
- const int32_t output_depth = filter_shape.dim(0);
- const int32_t filter_height = filter_shape.dim(1);
- const int32_t filter_width = filter_shape.dim(2);
- const int32_t output_height = output_shape.dim(1);
- const int32_t output_width = output_shape.dim(2);
-
- const int32_t stride_height = _params.stride_height;
- const int32_t stride_width = _params.stride_width;
- const int32_t dilation_height_factor = _params.dilation_height_factor;
- const int32_t dilation_width_factor = _params.dilation_width_factor;
-
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- const std::vector<double> effective_output_scale =
- getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
-
- const std::vector<ChannelQuantMultipliers> multipliers_raw =
- quantizeMultipliers(effective_output_scale);
- BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(multipliers_raw);
-
- for (int32_t batch = 0; batch < batches; ++batch)
- {
- for (int32_t out_y = 0; out_y < output_height; ++out_y)
- {
- for (int32_t out_x = 0; out_x < output_width; ++out_x)
- {
- for (int32_t out_c = 0; out_c < output_depth; ++out_c)
- {
- const int32_t in_y_origin = out_y * stride_height - _padding_height;
- const int32_t in_x_origin = out_x * stride_width - _padding_width;
- int32_t acc = 0;
- for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
- {
- for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
- {
- const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
- const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
- if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
- {
- for (int32_t in_c = 0; in_c < input_depth; ++in_c)
- {
- const uint8_t input_val =
- input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
- const uint8_t filter_val =
- filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
- acc += static_cast<int32_t>(input_val - input()->zero_point()) *
- static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
- }
- }
- }
- }
- if (bias_data)
- {
- acc += bias_data[out_c];
- }
-
- int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
- acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
-
- scaled_acc += output()->zero_point();
- scaled_acc = std::max(scaled_acc, activation_min);
- scaled_acc = std::min(scaled_acc, activation_max);
- output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
- }
- }
- }
- }
-}
-
-void Conv2D::evalQuantizedS8PerChannel() const
-{
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- tflite::ConvParams params{};
- params.padding_values.height = _padding_height;
- params.padding_values.width = _padding_width;
- params.stride_height = _params.stride_height;
- params.stride_width = _params.stride_width;
- params.dilation_height_factor = _params.dilation_height_factor;
- params.dilation_width_factor = _params.dilation_width_factor;
- // The kernel expects filter zero points to be negated.
- params.input_offset = -input()->zero_point(); // Note the '-'.
- params.weights_offset = 0; // Unused in tflite code
- params.output_offset = output()->zero_point();
- params.quantized_activation_min = activation_min;
- params.quantized_activation_max = activation_max;
-
- const std::vector<double> effective_output_scales =
- getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
-
- std::vector<ChannelQuantMultipliers> quant_multipliers =
- quantizeMultipliers(effective_output_scales);
-
- std::vector<int32_t> shifts;
- std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
- [](ChannelQuantMultipliers cm) { return cm.shift; });
- std::vector<int32_t> multipliers;
- std::transform(quant_multipliers.begin(), quant_multipliers.end(),
- std::back_inserter(multipliers),
- [](ChannelQuantMultipliers cm) { return cm.multiplier; });
-
- auto scratchpad = getOutputTensors()[1];
- int8_t *scratchpad_data = nullptr;
- if (scratchpad->is_allocatable())
- scratchpad_data = scratchpad->data<int8_t>();
-
- luci_interpreter_pal::ConvPerChannel(
- params, multipliers.data(), shifts.data(), getTensorShape(input()),
- getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
- getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
- getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
-}
-
-void Conv2D::evalQuantizedS16() const
-{
- const auto *input_data = getTensorData<int16_t>(input());
- const auto *filter_data = getTensorData<int16_t>(filter());
- const auto *bias_data = getTensorData<int64_t>(bias());
- auto *output_data = getTensorData<int16_t>(output());
-
- const Shape &input_shape = input()->shape();
- const Shape &filter_shape = filter()->shape();
- const Shape &output_shape = output()->shape();
-
- const int32_t batches = input_shape.dim(0);
- const int32_t input_height = input_shape.dim(1);
- const int32_t input_width = input_shape.dim(2);
- const int32_t input_depth = input_shape.dim(3);
- const int32_t output_depth = filter_shape.dim(0);
- const int32_t filter_height = filter_shape.dim(1);
- const int32_t filter_width = filter_shape.dim(2);
- const int32_t output_height = output_shape.dim(1);
- const int32_t output_width = output_shape.dim(2);
-
- const int32_t stride_height = _params.stride_height;
- const int32_t stride_width = _params.stride_width;
- const int32_t dilation_height_factor = _params.dilation_height_factor;
- const int32_t dilation_width_factor = _params.dilation_width_factor;
-
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- const std::vector<double> effective_output_scale =
- getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
-
- const std::vector<ChannelQuantMultipliers> multipliers_raw =
- quantizeMultipliers(effective_output_scale);
- BroadcastableWrapper<ChannelQuantMultipliers> multipliers(multipliers_raw);
-
- for (int32_t batch = 0; batch < batches; ++batch)
- {
- for (int32_t out_y = 0; out_y < output_height; ++out_y)
- {
- for (int32_t out_x = 0; out_x < output_width; ++out_x)
- {
- for (int32_t out_c = 0; out_c < output_depth; ++out_c)
- {
- const int32_t in_y_origin = out_y * stride_height - _padding_height;
- const int32_t in_x_origin = out_x * stride_width - _padding_width;
- int64_t acc = 0;
- for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
- {
- for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
- {
- const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
- const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
- if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
- {
- for (int32_t in_c = 0; in_c < input_depth; ++in_c)
- {
- const int16_t input_val =
- input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
- const int16_t filter_val =
- filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
- acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
- }
- }
- }
- }
- if (bias_data)
- {
- acc += bias_data[out_c];
- }
-
- int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
- acc, multipliers[out_c].multiplier, multipliers[out_c].shift);
-
- scaled_acc = std::max(scaled_acc, activation_min);
- scaled_acc = std::min(scaled_acc, activation_max);
-
- output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
- }
- }
- }
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_CONV2D_H
-#define LUCI_INTERPRETER_KERNELS_CONV2D_H
-
-#include "core/Kernel.h"
-#include "core/KernelParams.h"
-
-#include <memory>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class Conv2D : public KernelWithParams<Conv2DParams>
-{
-public:
- Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
- Tensor *scratchpad, const Conv2DParams ¶ms);
-
- const Tensor *input() const { return _inputs[0]; }
- const Tensor *filter() const { return _inputs[1]; }
- const Tensor *bias() const { return _inputs[2]; }
- Tensor *output() const { return _outputs[0]; }
-
- void configure() override;
- void execute() const override;
-
-private:
- void evalFloat() const;
- void evalQuantized() const;
- void evalQuantizedPerChannel() const;
- void evalQuantizedS8PerChannel() const;
- void evalQuantizedS16() const;
-
-private:
- int32_t _padding_height{};
- int32_t _padding_width{};
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_CONV2D_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Conv2D.h"
-#include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-class Conv2DTest : public ::testing::Test
-{
-protected:
- void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
-
- std::unique_ptr<IMemoryManager> _memory_manager;
-};
-
-TEST_F(Conv2DTest, Float)
-{
- Shape input_shape{1, 4, 3, 2};
- Shape filter_shape{2, 2, 2, 2};
- Shape bias_shape{2};
- std::vector<float> input_data{
- 1, 2, 3, 4, 5, 6, // row = 0
- 7, 8, 9, 10, 11, 12, // row = 1
- 13, 14, 15, 16, 17, 18, // row = 2
- 19, 20, 21, 22, 23, 24, // row = 3
- };
- std::vector<float> filter_data{
- 1, 2, -3, -4, // out = 0, row = 0
- -5, 6, -7, 8, // out = 1, row = 0
- 4, -2, 3, -1, // out = 0, row = 1
- -8, -6, 7, 5, // out = 1, row = 1
- };
- std::vector<float> bias_data{1, 2};
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
- Tensor filter_tensor =
- makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
- Tensor bias_tensor =
- makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
- Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
- Conv2DParams params{};
- params.padding = Padding::VALID;
- params.stride_height = 2;
- params.stride_width = 1;
- params.dilation_height_factor = 1;
- params.dilation_width_factor = 1;
- params.activation = Activation::RELU;
-
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
- kernel.configure();
- _memory_manager->allocate_memory(im2col);
- _memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- std::vector<float> ref_output_data{
- 11, 16, 7, 20, // row = 0
- 0, 40, 0, 44, // row = 1
- };
- std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
- EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-}
-
-TEST_F(Conv2DTest, FloatPointwise)
-{
- Shape input_shape{1, 2, 2, 2};
- Shape filter_shape{2, 1, 1, 2};
- Shape bias_shape{2};
- std::vector<float> input_data{
- 1, 2, // row = 0, col = 0
- 3, 4, // row = 0, col = 1
- 5, 6, // row = 1, col = 0
- 7, 8, // row = 1, col = 1
- };
- std::vector<float> filter_data{
- -1, 2, // out = 0
- -3, 4, // out = 1
- };
- std::vector<float> bias_data{1, 2};
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
- Tensor filter_tensor =
- makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
- Tensor bias_tensor =
- makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
- Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
-
- Conv2DParams params{};
- params.padding = Padding::VALID;
- params.stride_height = 1;
- params.stride_width = 1;
- params.dilation_height_factor = 1;
- params.dilation_width_factor = 1;
- params.activation = Activation::RELU;
-
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
- kernel.configure();
- _memory_manager->allocate_memory(im2col);
- _memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- std::vector<float> ref_output_data{
- 4, 7, 6, 9, // row = 0
- 8, 11, 10, 13, // row = 1
- };
- std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
- EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-}
-
-TEST_F(Conv2DTest, FloatCheck)
-{
- Shape input_shape{2, 2, 4, 1};
- Shape filter_shape{3, 2, 2, 1};
- Shape bias_shape{3};
- std::vector<float> input_data{
- // First batch
- 1, 1, 1, 1, // row = 1
- 2, 2, 2, 2, // row = 2
- // Second batch
- 1, 2, 3, 4, // row = 1
- 1, 2, 3, 4, // row = 2
- };
- std::vector<float> filter_data{
- 1, 2, 3, 4, // first 2x2 filter
- -1, 1, -1, 1, // second 2x2 filter
- -1, -1, 1, 1, // third 2x2 filter
- };
- std::vector<float> bias_data{1, 2, 3};
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
- Tensor filter_tensor =
- makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
- Tensor bias_tensor =
- makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
- Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
- Conv2DParams params{};
- params.padding = Padding::VALID;
- params.stride_height = 2;
- params.stride_width = 2;
- params.dilation_height_factor = 1;
- params.dilation_width_factor = 1;
- params.activation = Activation::NONE;
-
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
- kernel.configure();
- _memory_manager->allocate_memory(output_tensor);
- _memory_manager->allocate_memory(im2col);
- kernel.execute();
-
- std::vector<float> ref_output_data{
- 18, 2, 5, // first batch, left
- 18, 2, 5, // first batch, right
- 17, 4, 3, // second batch, left
- 37, 4, 3, // second batch, right
- };
- std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
- EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-}
-
-TEST_F(Conv2DTest, Uint8)
-{
- std::vector<float> input_data{
- // First batch
- 1, 1, 1, 1, // row = 1
- 2, 2, 2, 2, // row = 2
- // Second batch
- 1, 2, 3, 4, // row = 1
- 1, 2, 3, 4, // row = 2
- };
- std::vector<float> filter_data{
- 1, 2, 3, 4, // first 2x2 filter
- -1, 1, -1, 1, // second 2x2 filter
- -1, -1, 1, 1, // third 2x2 filter
- };
- std::vector<float> bias_data{1, 2, 3};
-
- std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
- std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
-
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
- input_data, _memory_manager.get());
- Tensor filter_tensor =
- makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first, input_quant_param.second,
- filter_data, _memory_manager.get());
- Tensor bias_tensor = makeInputTensor<DataType::S32>(
- {3}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
- Tensor im2col(DataType::U8, Shape({}), {}, "");
- Tensor output_tensor =
- makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
-
- Conv2DParams params{};
- params.padding = Padding::VALID;
- params.stride_height = 2;
- params.stride_width = 2;
- params.dilation_height_factor = 1;
- params.dilation_width_factor = 1;
- params.activation = Activation::NONE;
-
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
- kernel.configure();
- _memory_manager->allocate_memory(output_tensor);
- _memory_manager->allocate_memory(im2col);
- kernel.execute();
-
- std::vector<float> ref_output_data{
- 18, 2, 5, // first batch, left
- 18, 2, 5, // first batch, right
- 17, 4, 3, // second batch, left
- 37, 4, 3, // second batch, right
- };
- std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
- EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-}
-
-TEST_F(Conv2DTest, Uint8_CWQ)
-{
- const int output_channels = 3;
- std::vector<float> input_data{
- // First batch
- 1, 1, 1, 1, // row = 1
- 2, 2, 2, 2, // row = 2
- // Second batch
- 1, 2, 3, 4, // row = 1
- 1, 2, 3, 4, // row = 2
- };
- std::vector<float> filter_data{
- 1, 2, 3, 4, // first 2x2 filter
- -1, 1, -1, 1, // second 2x2 filter
- -1, -1, 1, 1, // third 2x2 filter
- };
- std::vector<float> bias_data{1, 2, 3};
- Shape filter_shape{output_channels, 2, 2, 1};
-
- std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 4);
- std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
-
- std::vector<std::pair<float, int32_t>> filter_quant_params;
- filter_quant_params.push_back(quantizationParams<uint8_t>(0, 4));
- filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
- filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
-
- std::vector<float> filter_scales;
- std::vector<int32_t> filter_zerops;
- for (auto iter : filter_quant_params)
- {
- filter_scales.push_back(iter.first);
- filter_zerops.push_back(iter.second);
- }
-
- std::vector<float> bias_scales;
- for (int i = 0; i < output_channels; ++i)
- bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
- std::vector<int32_t> zerop(output_channels, 0);
-
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
- input_data, _memory_manager.get());
- Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
- 0, filter_data, _memory_manager.get());
- Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
- bias_data, _memory_manager.get());
- Tensor im2col(DataType::U8, Shape({}), {}, "");
- Tensor output_tensor =
- makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
-
- Conv2DParams params{};
- params.padding = Padding::VALID;
- params.stride_height = 2;
- params.stride_width = 2;
- params.dilation_height_factor = 1;
- params.dilation_width_factor = 1;
- params.activation = Activation::NONE;
-
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
- kernel.configure();
- _memory_manager->allocate_memory(output_tensor);
- _memory_manager->allocate_memory(im2col);
- kernel.execute();
-
- std::vector<float> ref_output_data{
- 18, 2, 5, // first batch, left
- 18, 2, 5, // first batch, right
- 17, 4, 3, // second batch, left
- 37, 4, 3, // second batch, right
- };
- std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
- EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-}
-
-TEST_F(Conv2DTest, SInt8_CWQ)
-{
- const int output_channels = 3;
- std::vector<float> input_data{
- // First batch
- 1, 1, 1, 1, // row = 1
- 2, 2, 2, 2, // row = 2
- // Second batch
- 1, 2, 3, 4, // row = 1
- 1, 2, 3, 4, // row = 2
- };
- std::vector<float> filter_data{
- 1, 2, 3, 4, // first 2x2 filter
- -1, 1, -1, 1, // second 2x2 filter
- -1, -1, 1, 1, // third 2x2 filter
- };
- std::vector<float> bias_data{1, 2, 3};
- Shape filter_shape{output_channels, 2, 2, 1};
-
- std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(0, 4);
- std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
-
- std::vector<std::pair<float, int32_t>> filter_quant_params;
- filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
- filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
- filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
-
- std::vector<float> filter_scales;
- std::vector<int32_t> filter_zerops;
- for (auto iter : filter_quant_params)
- {
- filter_scales.push_back(iter.first);
- filter_zerops.push_back(iter.second);
- }
-
- std::vector<float> bias_scales;
- for (int i = 0; i < output_channels; ++i)
- bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
- std::vector<int32_t> zerop(output_channels, 0);
-
- Tensor input_tensor =
- makeInputTensor<DataType::S8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
- input_data, _memory_manager.get());
- Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
- 0, filter_data, _memory_manager.get());
- Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
- bias_data, _memory_manager.get());
- Tensor im2col(DataType::S8, Shape({}), {}, "");
- Tensor output_tensor =
- makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
-
- Conv2DParams params{};
- params.padding = Padding::VALID;
- params.stride_height = 2;
- params.stride_width = 2;
- params.dilation_height_factor = 1;
- params.dilation_width_factor = 1;
- params.activation = Activation::NONE;
-
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
- kernel.configure();
- _memory_manager->allocate_memory(output_tensor);
- _memory_manager->allocate_memory(im2col);
- kernel.execute();
-
- std::vector<float> ref_output_data{
- 18, 2, 5, // first batch, left
- 18, 2, 5, // first batch, right
- 17, 4, 3, // second batch, left
- 37, 4, 3, // second batch, right
- };
- std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
- EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-}
-
-TEST_F(Conv2DTest, SInt16)
-{
- Shape input_shape{1, 4, 3, 2};
- Shape filter_shape{2, 2, 2, 2};
- Shape bias_shape{2};
- std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
-
- std::vector<float> input_data{
- 1, 2, 3, 4, 5, 6, // row = 0
- 7, 8, 9, 10, 11, 12, // row = 1
- 13, 14, 15, 16, 17, 18, // row = 2
- 19, 20, 21, 22, 23, 24, // row = 3
- };
- std::vector<float> filter_data{
- 1, 2, -3, -4, // out = 0, row = 0
- -5, 6, -7, 8, // out = 1, row = 0
- 4, -2, 3, -1, // out = 0, row = 1
- -8, -6, 7, 5, // out = 1, row = 1
- };
- std::vector<float> bias_data{1, 2};
- std::vector<float> ref_output_data{
- 11, 16, 7, 20, // row = 0
- 0, 40, 0, 44, // row = 1
- };
-
- Tensor input_tensor =
- makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
- Tensor filter_tensor =
- makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
- Tensor bias_tensor =
- makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
- Tensor im2col(DataType::S16, Shape({}), {}, "");
- Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
-
- Conv2DParams params{};
- params.padding = Padding::VALID;
- params.stride_height = 2;
- params.stride_width = 1;
- params.dilation_height_factor = 1;
- params.dilation_width_factor = 1;
- params.activation = Activation::RELU;
-
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
- kernel.configure();
- _memory_manager->allocate_memory(output_tensor);
- _memory_manager->allocate_memory(im2col);
- kernel.execute();
-
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
- EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
-}
-
-TEST_F(Conv2DTest, SInt16_CWQ_weights)
-{
- Shape input_shape{1, 2, 2, 2}; // Batch x H x W x C
- Shape filter_shape{3, 1, 1, 2}; // Out channels x H x W x In Channels
- Shape bias_shape{3};
- std::vector<int32_t> ref_output_shape{1, 2, 2, 3};
-
- std::vector<float> input_data{
- 1, 2, // row = 0, col 0
- 3, 4, // row = 0, col 1
- 5, 6, // row = 1, col 0
- 7, 8, // row = 1, col 1
- };
- std::vector<float> filter_data{
- 4, -3, // out = 0
- 1, -3, // out = 1
- 5, -3, // out = 2
- };
- std::vector<float> bias_data{1, 10, 5};
- std::vector<float> ref_output_data{
- 0, 5, 4, // row 0, col 0
- 1, 1, 8, // row 0, col 1
- 3, 0, 12, // row 1, col 0
- 5, 0, 16, // row 1, col 1
- };
-
- float input_scale = 0.25f;
- float output_scale = 0.05f;
- std::vector<float> filter_scales = {0.25f, 0.2f, 0.1f};
- std::vector<float> bias_scales;
- for (int i = 0; i < filter_scales.size(); ++i)
- bias_scales.push_back(filter_scales[i] * input_scale);
- std::vector<int32_t> zerop = {0, 0, 0};
-
- Tensor input_tensor =
- makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
- Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
- filter_data, _memory_manager.get());
- Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
- _memory_manager.get());
- Tensor im2col(DataType::S16, Shape({}), {}, "");
- Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
-
- Conv2DParams params{};
- params.padding = Padding::VALID;
- params.stride_height = 1;
- params.stride_width = 1;
- params.dilation_height_factor = 1;
- params.dilation_width_factor = 1;
- params.activation = Activation::RELU;
-
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
- kernel.configure();
- _memory_manager->allocate_memory(output_tensor);
- _memory_manager->allocate_memory(im2col);
- kernel.execute();
-
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
- EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
-}
-
-TEST_F(Conv2DTest, Unsupported_Type_Configure_NEG)
-{
- Shape input_shape{1, 4, 3, 2};
- Shape filter_shape{2, 2, 2, 2};
- Shape bias_shape{2};
- std::vector<int32_t> input_data{
- 1, 2, 3, 4, 5, 6, // row = 0
- 7, 8, 9, 10, 11, 12, // row = 1
- 13, 14, 15, 16, 17, 18, // row = 2
- 19, 20, 21, 22, 23, 24, // row = 3
- };
- std::vector<float> filter_data{
- 1, 2, -3, -4, // out = 0, row = 0
- -5, 6, -7, 8, // out = 1, row = 0
- 4, -2, 3, -1, // out = 0, row = 1
- -8, -6, 7, 5, // out = 1, row = 1
- };
- std::vector<float> bias_data{1, 2};
- Tensor input_tensor =
- makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
- Tensor filter_tensor =
- makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
- Tensor bias_tensor =
- makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
- Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
- Conv2DParams params{};
- params.padding = Padding::VALID;
- params.stride_height = 2;
- params.stride_width = 1;
- params.dilation_height_factor = 1;
- params.dilation_width_factor = 1;
- params.activation = Activation::RELU;
-
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(Conv2DTest, Invalid_Bias_Type_NEG)
-{
- Shape input_shape{1, 4, 3, 2};
- Shape filter_shape{2, 2, 2, 2};
- Shape bias_shape{2};
- std::vector<float> input_data{
- 1, 2, 3, 4, 5, 6, // row = 0
- 7, 8, 9, 10, 11, 12, // row = 1
- 13, 14, 15, 16, 17, 18, // row = 2
- 19, 20, 21, 22, 23, 24, // row = 3
- };
- std::vector<float> filter_data{
- 1, 2, -3, -4, // out = 0, row = 0
- -5, 6, -7, 8, // out = 1, row = 0
- 4, -2, 3, -1, // out = 0, row = 1
- -8, -6, 7, 5, // out = 1, row = 1
- };
- std::vector<uint8_t> bias_data{1, 2};
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
- Tensor filter_tensor =
- makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
- Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data, _memory_manager.get());
- Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
- Conv2DParams params{};
- params.padding = Padding::VALID;
- params.stride_height = 2;
- params.stride_width = 1;
- params.dilation_height_factor = 1;
- params.dilation_width_factor = 1;
- params.activation = Activation::RELU;
-
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(Conv2DTest, Invalid_Bias_Data_NEG)
-{
- Shape input_shape{1, 4, 3, 2};
- Shape filter_shape{2, 2, 2, 2};
- Shape bias_shape{3};
- std::vector<float> input_data{
- 1, 2, 3, 4, 5, 6, // row = 0
- 7, 8, 9, 10, 11, 12, // row = 1
- 13, 14, 15, 16, 17, 18, // row = 2
- 19, 20, 21, 22, 23, 24, // row = 3
- };
- std::vector<float> filter_data{
- 1, 2, -3, -4, // out = 0, row = 0
- -5, 6, -7, 8, // out = 1, row = 0
- 4, -2, 3, -1, // out = 0, row = 1
- -8, -6, 7, 5, // out = 1, row = 1
- };
- std::vector<float> bias_data{1, 2, 3};
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
- Tensor filter_tensor =
- makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
- Tensor bias_tensor =
- makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
- Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
- Conv2DParams params{};
- params.padding = Padding::VALID;
- params.stride_height = 2;
- params.stride_width = 1;
- params.dilation_height_factor = 1;
- params.dilation_width_factor = 1;
- params.activation = Activation::RELU;
-
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(Conv2DTest, Invalid_Input_Shape_NEG)
-{
- Shape input_shape{1, 4, 6, 1};
- Shape filter_shape{2, 2, 2, 2};
- Shape bias_shape{2};
- std::vector<float> input_data{
- 1, 2, 3, 4, 5, 6, // row = 0
- 7, 8, 9, 10, 11, 12, // row = 1
- 13, 14, 15, 16, 17, 18, // row = 2
- 19, 20, 21, 22, 23, 24, // row = 3
- };
- std::vector<float> filter_data{
- 1, 2, -3, -4, // out = 0, row = 0
- -5, 6, -7, 8, // out = 1, row = 0
- 4, -2, 3, -1, // out = 0, row = 1
- -8, -6, 7, 5, // out = 1, row = 1
- };
- std::vector<float> bias_data{1, 2};
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
- Tensor filter_tensor =
- makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
- Tensor bias_tensor =
- makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
- Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
- Conv2DParams params{};
- params.padding = Padding::VALID;
- params.stride_height = 2;
- params.stride_width = 1;
- params.dilation_height_factor = 1;
- params.dilation_width_factor = 1;
- params.activation = Activation::RELU;
-
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(Conv2DTest, Invalid_fused_act_tanh_NEG)
-{
- Shape input_shape{1, 4, 3, 2};
- Shape filter_shape{2, 2, 2, 2};
- Shape bias_shape{2};
- std::vector<float> input_data{
- 1, 2, 3, 4, 5, 6, // row = 0
- 7, 8, 9, 10, 11, 12, // row = 1
- 13, 14, 15, 16, 17, 18, // row = 2
- 19, 20, 21, 22, 23, 24, // row = 3
- };
- std::vector<float> filter_data{
- 1, 2, -3, -4, // out = 0, row = 0
- -5, 6, -7, 8, // out = 1, row = 0
- 4, -2, 3, -1, // out = 0, row = 1
- -8, -6, 7, 5, // out = 1, row = 1
- };
- std::vector<float> bias_data{1, 2};
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
- Tensor filter_tensor =
- makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
- Tensor bias_tensor =
- makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
- Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
- Conv2DParams params{};
- params.padding = Padding::VALID;
- params.stride_height = 2;
- params.stride_width = 1;
- params.dilation_height_factor = 1;
- params.dilation_width_factor = 1;
- params.activation = Activation::TANH;
-
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DepthToSpace.h"
-#include "Utils.h"
-#include "PALDepthToSpace.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams ¶ms)
- : KernelWithParams<DepthToSpaceParams>({input}, {output}, params)
-{
-}
-
-void DepthToSpace::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
- LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
- output()->element_type() == DataType::U8)
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type())
- const int block_size = params().block_size;
- const int32_t input_height = input()->shape().dim(1);
- const int32_t input_width = input()->shape().dim(2);
- const int32_t input_channels = input()->shape().dim(3);
- int32_t output_height = input_height * block_size;
- int32_t output_width = input_width * block_size;
- int32_t output_channels = input_channels / block_size / block_size;
-
- LUCI_INTERPRETER_CHECK(input_height == output_height / block_size);
- LUCI_INTERPRETER_CHECK(input_width == output_width / block_size);
- LUCI_INTERPRETER_CHECK(input_channels == output_channels * block_size * block_size);
-
- Shape output_shape(4);
- output_shape.dim(0) = input()->shape().dim(0);
- output_shape.dim(1) = output_height;
- output_shape.dim(2) = output_width;
- output_shape.dim(3) = output_channels;
-
- output()->resize(output_shape);
-}
-
-void DepthToSpace::execute() const
-{
- tflite::DepthToSpaceParams op_params;
- op_params.block_size = params().block_size;
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
- getTensorData<float>(input()), getTensorShape(output()),
- getTensorData<float>(output()));
- break;
- case DataType::U8:
- luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
- getTensorData<uint8_t>(input()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
- break;
- default:
- throw std::runtime_error("Unsupported Type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/DepthwiseConv2D.h"
-
-#include "kernels/Utils.h"
-
-#include "PALDepthwiseConv2d.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
- Tensor *output, Tensor *scratchpad,
- const DepthwiseConv2DParams ¶ms)
- : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output, scratchpad}, params)
-{
-}
-
-void DepthwiseConv2D::configure()
-{
- // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
- // | input filter bias output |
- // ----+---------------------------+
- // (1) | float float float float |
- // (2) | float int8 float float | hybrid
- // (3) | uint8 uint8 int32 uint8 | quantized
- // (4) | int8 int8 int32 int8 | quantized per channel
- // (5) | int16 int8 int64 int16 | quantized per channel 16x8
- //
- // We only support (1), (3) and (4) for now, and additionally the following:
- // | input filter bias output |
- // ----+---------------------------+
- // (5) | int16 int16 int64 int16 |
- //
- if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
- {
- LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
- }
- else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
- {
- LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
- }
- else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
- {
- LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
- LUCI_INTERPRETER_CHECK(static_cast<uint32_t>(filter()->shape().dim(3)) ==
- filter()->scales().size());
- for (auto zerop : filter()->zero_points())
- {
- LUCI_INTERPRETER_CHECK(zerop == 0);
- }
- LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
- }
- else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
- {
- LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
- }
- else
- {
- throw std::runtime_error("Unsupported type.");
- }
- LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
-
- const Shape &input_shape = input()->shape();
- const Shape &filter_shape = filter()->shape();
- LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
-
- const int32_t batches = input_shape.dim(0);
- const int32_t input_height = input_shape.dim(1);
- const int32_t input_width = input_shape.dim(2);
- // Filter format: [1, H, W, O].
- LUCI_INTERPRETER_CHECK(filter_shape.dim(0) == 1);
- const int32_t filter_height = filter_shape.dim(1);
- const int32_t filter_width = filter_shape.dim(2);
- const int32_t channels_out = filter_shape.dim(3);
-
- LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
- bias()->shape().dim(0) == channels_out));
-
- const int32_t output_height =
- computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
- _params.dilation_height_factor);
- const int32_t output_width =
- computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
- _params.dilation_width_factor);
-
- _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
- input_height, filter_height, output_height);
- _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
- filter_width, output_width);
-
- output()->resize({batches, output_height, output_width, channels_out});
-
- tflite::DepthwiseParams params{};
-
- params.dilation_height_factor = _params.dilation_height_factor;
- params.dilation_width_factor = _params.dilation_width_factor;
-
- auto scratchpad = getOutputTensors()[1];
- luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(),
- getTensorShape(input()), getTensorShape(filter()),
- getTensorShape(output()));
-}
-
-void DepthwiseConv2D::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- if (filter()->element_type() == DataType::FLOAT32)
- {
- evalFloat();
- break;
- }
- throw std::runtime_error("Unsupported type.");
- case DataType::U8:
- if (filter()->scales().size() == 1)
- {
- evalQuantized();
- }
- else if (filter()->scales().size() > 1)
- {
- LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
- LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
- static_cast<size_t>(filter()->shape().dim(3)));
- evalQuantizedPerChannel();
- }
- break;
- case DataType::S8:
- evalQuantizedS8PerChannel();
- break;
- case DataType::S16:
- evalQuantizedS16();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void DepthwiseConv2D::evalFloat() const
-{
- float activation_min{};
- float activation_max{};
- calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
- tflite::DepthwiseParams params{};
- params.padding_values.height = _padding_height;
- params.padding_values.width = _padding_width;
- params.stride_height = _params.stride_height;
- params.stride_width = _params.stride_width;
- params.dilation_height_factor = _params.dilation_height_factor;
- params.dilation_width_factor = _params.dilation_width_factor;
- params.depth_multiplier = _params.depth_multiplier;
- params.float_activation_min = activation_min;
- params.float_activation_max = activation_max;
-
- tflite::reference_ops::DepthwiseConv(
- params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
- getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
- getTensorShape(output()), getTensorData<float>(output()));
-}
-
-void DepthwiseConv2D::evalQuantizedPerChannel() const
-{
- const auto *input_data = getTensorData<uint8_t>(input());
- const auto *filter_data = getTensorData<uint8_t>(filter());
- const auto *bias_data = getTensorData<int32_t>(bias());
- auto *output_data = getTensorData<uint8_t>(output());
-
- const Shape &input_shape = input()->shape();
- const Shape &filter_shape = filter()->shape();
- const Shape &output_shape = output()->shape();
-
- const int32_t batches = input_shape.dim(0);
- const int32_t input_height = input_shape.dim(1);
- const int32_t input_width = input_shape.dim(2);
- const int32_t input_depth = input_shape.dim(3);
- const int32_t filter_height = filter_shape.dim(1);
- const int32_t filter_width = filter_shape.dim(2);
- const int32_t output_height = output_shape.dim(1);
- const int32_t output_width = output_shape.dim(2);
-
- const int32_t stride_height = _params.stride_height;
- const int32_t stride_width = _params.stride_width;
- const int32_t dilation_height_factor = _params.dilation_height_factor;
- const int32_t dilation_width_factor = _params.dilation_width_factor;
- const int32_t depth_multiplier = _params.depth_multiplier;
-
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- const std::vector<double> effective_output_scales =
- getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
-
- std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
- quantizeMultipliers(effective_output_scales);
- BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
-
- for (int batch = 0; batch < batches; ++batch)
- {
- for (int out_y = 0; out_y < output_height; ++out_y)
- {
- for (int out_x = 0; out_x < output_width; ++out_x)
- {
- for (int in_channel = 0; in_channel < input_depth; ++in_channel)
- {
- for (int m = 0; m < depth_multiplier; ++m)
- {
- const int output_channel = m + in_channel * depth_multiplier;
- const int in_x_origin = (out_x * stride_width) - _padding_width;
- const int in_y_origin = (out_y * stride_height) - _padding_height;
- int32 acc = 0;
- for (int filter_y = 0; filter_y < filter_height; ++filter_y)
- {
- for (int filter_x = 0; filter_x < filter_width; ++filter_x)
- {
- const int in_x = in_x_origin + dilation_width_factor * filter_x;
- const int in_y = in_y_origin + dilation_height_factor * filter_y;
- // Zero padding by omitting the areas outside the image.
- const bool is_point_inside_image =
- (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
- if (is_point_inside_image)
- {
- int32 input_val =
- input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)];
- int32 filter_val =
- filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)];
- acc += (filter_val - filter()->zero_points()[output_channel]) *
- (input_val - input()->zero_point());
- }
- }
- }
- if (bias_data)
- {
- acc += bias_data[output_channel];
- }
- int32_t output_multiplier = quant_multipliers[output_channel].multiplier;
- int output_shift = quant_multipliers[output_channel].shift;
- int32_t scaled_acc =
- tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
- scaled_acc += output()->zero_point();
- scaled_acc = std::max(scaled_acc, activation_min);
- scaled_acc = std::min(scaled_acc, activation_max);
- output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] =
- static_cast<uint8_t>(scaled_acc);
- }
- }
- }
- }
- }
-}
-
-void DepthwiseConv2D::evalQuantized() const
-{
- const auto input_scale = static_cast<double>(input()->scale());
- const auto filter_scale = static_cast<double>(filter()->scale());
- const auto output_scale = static_cast<double>(output()->scale());
-
- const double real_multiplier = input_scale * filter_scale / output_scale;
- int32_t output_multiplier{};
- int output_shift{};
- quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
-
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- tflite::DepthwiseParams params{};
- params.padding_values.height = _padding_height;
- params.padding_values.width = _padding_width;
- params.stride_height = _params.stride_height;
- params.stride_width = _params.stride_width;
- params.dilation_height_factor = _params.dilation_height_factor;
- params.dilation_width_factor = _params.dilation_width_factor;
- params.depth_multiplier = _params.depth_multiplier;
- // The kernel expects input and filter zero points to be negated.
- params.input_offset = -input()->zero_point(); // Note the '-'.
- params.weights_offset = -filter()->zero_point(); // Note the '-'.
- params.output_offset = output()->zero_point();
- params.output_multiplier = output_multiplier;
- params.output_shift = output_shift;
- params.quantized_activation_min = activation_min;
- params.quantized_activation_max = activation_max;
-
- tflite::reference_ops::DepthwiseConv(
- params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
- getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
- getTensorShape(output()), getTensorData<uint8_t>(output()));
-}
-
-void DepthwiseConv2D::evalQuantizedS8PerChannel() const
-{
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- tflite::DepthwiseParams params{};
-
- params.padding_type = tflite::PaddingType::kSame;
- params.padding_values.height = _padding_height;
- params.padding_values.width = _padding_width;
- params.stride_height = _params.stride_height;
- params.stride_width = _params.stride_width;
- params.dilation_height_factor = _params.dilation_height_factor;
- params.dilation_width_factor = _params.dilation_width_factor;
- params.depth_multiplier = _params.depth_multiplier;
- // The kernel expects input and filter zero points to be negated.
- params.input_offset = -input()->zero_point(); // Note the '-'.
- params.weights_offset = 0;
- params.output_offset = output()->zero_point();
- params.output_multiplier = 1; // unused in tflite code
- params.output_shift = 0; // unused in tflite code
- params.quantized_activation_min = activation_min;
- params.quantized_activation_max = activation_max;
-
- const std::vector<double> effective_output_scales =
- getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
-
- std::vector<ChannelQuantMultipliers> quant_multipliers =
- quantizeMultipliers(effective_output_scales);
-
- std::vector<int32_t> shifts;
- std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
- [](ChannelQuantMultipliers cm) { return cm.shift; });
- std::vector<int32_t> multipliers;
- std::transform(quant_multipliers.begin(), quant_multipliers.end(),
- std::back_inserter(multipliers),
- [](ChannelQuantMultipliers cm) { return cm.multiplier; });
-
- auto scratchpad = getOutputTensors()[1];
- int8_t *scratchpad_data = nullptr;
- if (scratchpad->is_allocatable())
- scratchpad_data = scratchpad->data<int8_t>();
-
- luci_interpreter_pal::DepthwiseConvPerChannel<int8_t>(
- params, multipliers.data(), shifts.data(), getTensorShape(input()),
- getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
- getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
- getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
-}
-
-void DepthwiseConv2D::evalQuantizedS16() const
-{
- const auto *input_data = getTensorData<int16_t>(input());
- const auto *filter_data = getTensorData<int16_t>(filter());
- const auto *bias_data = getTensorData<int64_t>(bias());
- auto *output_data = getTensorData<int16_t>(output());
-
- const Shape &input_shape = input()->shape();
- const Shape &filter_shape = filter()->shape();
- const Shape &output_shape = output()->shape();
-
- const int32_t batches = input_shape.dim(0);
- const int32_t input_height = input_shape.dim(1);
- const int32_t input_width = input_shape.dim(2);
- const int32_t input_depth = input_shape.dim(3);
- const int32_t filter_height = filter_shape.dim(1);
- const int32_t filter_width = filter_shape.dim(2);
- const int32_t output_height = output_shape.dim(1);
- const int32_t output_width = output_shape.dim(2);
-
- const int32_t stride_height = _params.stride_height;
- const int32_t stride_width = _params.stride_width;
- const int32_t dilation_height_factor = _params.dilation_height_factor;
- const int32_t dilation_width_factor = _params.dilation_width_factor;
- const int32_t depth_multiplier = _params.depth_multiplier;
-
- const std::vector<double> effective_output_scales =
- getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
-
- std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
- quantizeMultipliers(effective_output_scales);
-
- BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
-
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- for (int32_t batch = 0; batch < batches; ++batch)
- {
- for (int32_t out_y = 0; out_y < output_height; ++out_y)
- {
- for (int32_t out_x = 0; out_x < output_width; ++out_x)
- {
- for (int32_t in_c = 0; in_c < input_depth; ++in_c)
- {
- for (int32_t m = 0; m < depth_multiplier; ++m)
- {
- const int32_t out_c = m + in_c * depth_multiplier;
- const int32_t in_y_origin = out_y * stride_height - _padding_height;
- const int32_t in_x_origin = out_x * stride_width - _padding_width;
- int64_t acc = 0;
- for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
- {
- for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
- {
- const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
- const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
- if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
- {
- const int16_t input_val =
- input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
- const int16_t filter_val =
- filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)];
- acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
- }
- }
- }
- if (bias_data != nullptr)
- {
- acc += bias_data[out_c];
- }
-
- int32_t output_multiplier = quant_multipliers[out_c].multiplier;
- int output_shift = quant_multipliers[out_c].shift;
- int32_t scaled_acc =
- tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-
- scaled_acc = std::max(scaled_acc, activation_min);
- scaled_acc = std::min(scaled_acc, activation_max);
-
- output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
- }
- }
- }
- }
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Dequantize.h"
-#include "kernels/Utils.h"
-#include "PALDequantize.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Dequantize::Dequantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Dequantize::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::S8 ||
- input()->element_type() == loco::DataType::U8 ||
- input()->element_type() == loco::DataType::S16);
-
- LUCI_INTERPRETER_CHECK(input()->scales().size() == 1);
-
- if (input()->element_type() == loco::DataType::S16)
- LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
-
- LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
-
- output()->resize(input()->shape());
-}
-
-void Dequantize::execute() const
-{
- tflite::DequantizationParams op_params;
- op_params.zero_point = input()->zero_point();
- op_params.scale = input()->scale();
-
- switch (input()->element_type())
- {
- case loco::DataType::U8:
- {
- luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
- getTensorData<uint8_t>(input()), getTensorShape(output()),
- getTensorData<float>(output()));
- break;
- }
- case loco::DataType::S8:
- {
- luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
- getTensorData<int8_t>(input()), getTensorShape(output()),
- getTensorData<float>(output()));
- break;
- }
- case loco::DataType::S16:
- {
- luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
- getTensorData<int16_t>(input()), getTensorShape(output()),
- getTensorData<float>(output()));
- break;
- }
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Div.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/div.h>
-#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Div::Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams ¶ms)
- : KernelWithParams<DivParams>({input1, input2}, {output}, params)
-{
-}
-
-void Div::configure()
-{
- LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
- LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
-
- output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void Div::execute() const
-{
- switch (input1()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::S64:
- evalInteger<int64_t>();
- break;
- case DataType::S32:
- evalInteger<int32_t>();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Div::evalFloat() const
-{
- tflite::ArithmeticParams params{};
- fillArithmeticActivationRange<float>(params, _params.activation);
-
- const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
-
- if (need_broadcast)
- {
- tflite::reference_ops::BroadcastDivSlow(
- params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
- getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
- }
- else
- {
- tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<float>(input1()),
- getTensorShape(input2()), getTensorData<float>(input2()),
- getTensorShape(output()), getTensorData<float>(output()));
- }
-}
-
-template <typename T> void Div::evalInteger() const
-{
- tflite::ArithmeticParams params{};
- fillArithmeticActivationRange<T>(params, _params.activation);
-
- const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
-
- if (need_broadcast)
- {
- tflite::reference_ops::BroadcastDivSlow(
- params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
- getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
- }
- else
- {
- tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<T>(input1()),
- getTensorShape(input2()), getTensorData<T>(input2()),
- getTensorShape(output()), getTensorData<T>(output()));
- }
-}
-
-void Div::evalQuantized() const
-{
- const auto input1_scale = static_cast<double>(input1()->scale());
- const auto input2_scale = static_cast<double>(input2()->scale());
- const auto output_scale = static_cast<double>(output()->scale());
-
- const double real_output_multiplier = input1_scale / (input2_scale * output_scale);
-
- int32_t output_multiplier{};
- int output_shift{};
-
- quantizeMultiplier(real_output_multiplier, &output_multiplier, &output_shift);
-
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- tflite::ArithmeticParams params{};
-
- params.input1_offset = -input1()->zero_point(); // Note the '-'.
- params.input2_offset = -input2()->zero_point(); // Note the '-'.
- params.output_offset = output()->zero_point();
- params.output_multiplier = output_multiplier;
- params.output_shift = output_shift;
- params.quantized_activation_min = activation_min;
- params.quantized_activation_max = activation_max;
-
- const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
-
- if (need_broadcast)
- {
- tflite::reference_ops::BroadcastDivSlow(
- params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
- getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
- }
- else
- {
- tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
- getTensorShape(input2()), getTensorData<uint8_t>(input2()),
- getTensorShape(output()), getTensorData<uint8_t>(output()));
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Elu.h"
-#include "kernels/Utils.h"
-
-#include "PALElu.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Elu::Elu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Elu::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
- output()->resize(input()->shape());
-}
-
-void Elu::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- luci_interpreter_pal::Elu(getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(output()), getTensorData<float>(output()));
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Equal.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Equal::Equal(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
-
-void Equal::configure()
-{
- LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
- LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
-
- if (x()->element_type() == DataType::U8)
- {
- quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
- quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
- }
- output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
-}
-
-void Equal::execute() const
-{
- switch (x()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::S64:
- evalInteger<int64_t>();
- break;
- case DataType::S32:
- evalInteger<int32_t>();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Equal::evalFloat() const
-{
- const auto x_data = getTensorData<float>(x());
- const auto y_data = getTensorData<float>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowEqual(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
- else
- {
- tflite::reference_ops::Equal(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
- y_data, getTensorShape(output()), output_data);
- }
-}
-
-template <typename T> void Equal::evalInteger() const
-{
- const auto x_data = getTensorData<T>(x());
- const auto y_data = getTensorData<T>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowEqualNoScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
- else
- {
- tflite::reference_ops::EqualNoScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data, getTensorShape(output()),
- output_data);
- }
-}
-
-void Equal::evalQuantized() const
-{
- const auto x_data = getTensorData<uint8_t>(x());
- const auto y_data = getTensorData<uint8_t>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.left_shift = 8;
- op_params.input1_offset = -x()->zero_point(); // Note the '-'
- op_params.input1_shift = _x_shift;
- op_params.input1_multiplier = _x_multiplier;
- op_params.input2_offset = -y()->zero_point(); // Note the '-'
- op_params.input2_shift = _y_shift;
- op_params.input2_multiplier = _y_multiplier;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowEqualWithScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
- else
- {
- tflite::reference_ops::EqualWithScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data, getTensorShape(output()),
- output_data);
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Exp.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/exp.h>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Exp::Exp(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Exp::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
- output()->resize(input()->shape());
-}
-
-void Exp::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Exp::evalFloat() const
-{
- const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
- tflite::reference_ops::Exp(getTensorData<float>(input()), size, getTensorData<float>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/ExpandDims.h"
-#include "kernels/Utils.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-ExpandDims::ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output)
- : Kernel({input, axis}, {output})
-{
-}
-
-void ExpandDims::configure()
-{
- int32_t axis_value;
-
- switch (axis()->element_type())
- {
- case loco::DataType::S32:
- axis_value = *getTensorData<int32_t>(axis());
- break;
- case loco::DataType::S64:
- axis_value = static_cast<int32_t>(*getTensorData<int64_t>(axis()));
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-
- const auto input_shape = input()->shape();
-
- if (axis_value < 0)
- {
- axis_value += input_shape.num_dims() + 1;
- }
-
- LUCI_INTERPRETER_CHECK(axis_value <= input_shape.num_dims() and axis_value >= 0);
-
- Shape output_shape(input_shape.num_dims() + 1);
- for (int32_t i = 0; i < output_shape.num_dims(); ++i)
- {
- if (i < axis_value)
- {
- output_shape.dim(i) = input_shape.dim(i);
- }
- else if (i == axis_value)
- {
- output_shape.dim(i) = 1;
- }
- else
- {
- LUCI_INTERPRETER_CHECK(i >= 1);
- output_shape.dim(i) = input_shape.dim(i - 1);
- }
- }
-
- output()->resize(output_shape);
-}
-
-void ExpandDims::execute() const
-{
- // Just copy input to output
- const auto *input_data = input()->data<void>();
- auto *output_data = output()->data<void>();
-
- const size_t element_size = getDataTypeSize(input()->element_type());
- const int32_t num_elements = input()->shape().num_elements();
- std::memcpy(output_data, input_data, num_elements * element_size);
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
-#define LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
-
-#include "core/Kernel.h"
-#include "core/KernelParams.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class ExpandDims : public Kernel
-{
-public:
- ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output);
-
- const Tensor *input() const { return _inputs[0]; }
- const Tensor *axis() const { return _inputs[1]; }
- Tensor *output() const { return _outputs[0]; }
-
- void configure() override;
- void execute() const override;
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/ExpandDims.h"
-#include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-class ExpandDimsTest : public ::testing::Test
-{
-protected:
- void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
-
- std::unique_ptr<IMemoryManager> _memory_manager;
-};
-
-TEST_F(ExpandDimsTest, PositiveAxis)
-{
- std::vector<int32_t> input_data{-1, 1, -2, 2};
- std::initializer_list<int32_t> input_shape = {2, 2};
-
- std::initializer_list<int32_t> axis_value = {0};
-
- Tensor input_tensor =
- makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
- Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::S32);
-
- ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
- kernel.configure();
- _memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2}));
-}
-
-TEST_F(ExpandDimsTest, NegAxis)
-{
- std::vector<int32_t> input_data{-1, 1, -2, 2};
- std::initializer_list<int32_t> input_shape = {2, 2};
-
- std::initializer_list<int32_t> axis_value = {-1};
-
- Tensor input_tensor =
- makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
- Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::S32);
-
- ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
- kernel.configure();
- _memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 1}));
-}
-
-TEST_F(ExpandDimsTest, InvalidAxisType_NEG)
-{
- std::vector<int32_t> input_data{-1, 1, -2, 2};
- std::initializer_list<int32_t> input_shape = {2, 2};
-
- std::initializer_list<float> axis_value = {1.0};
-
- Tensor input_tensor =
- makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
- Tensor axis_tensor = makeInputTensor<DataType::FLOAT32>({1}, axis_value, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::S32);
-
- ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST_F(ExpandDimsTest, InvalidAxisValue_NEG)
-{
- std::vector<int32_t> input_data{-1, 1, -2, 2};
- std::initializer_list<int32_t> input_shape = {2, 2};
-
- std::initializer_list<int32_t> axis_value = {3};
-
- Tensor input_tensor =
- makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
- Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::S32);
-
- ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Fill.h"
-#include "kernels/Utils.h"
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Fill::Fill(const Tensor *dims, const Tensor *value, Tensor *output)
- : Kernel({dims, value}, {output})
-{
-}
-
-template <typename T> void Fill::configureShape()
-{
- const auto dims_data = getTensorData<T>(dims());
- Shape output_shape(dims()->shape().dim(0));
-
- for (int i = 0; i < output_shape.num_dims(); ++i)
- {
- T data = dims_data[i];
- if (data < 0)
- throw std::runtime_error("Fill dimensions must be >= 0");
-
- output_shape.dim(i) = data;
- }
-
- output()->resize(output_shape);
-}
-
-void Fill::configure()
-{
- const auto dims_shape = dims()->shape();
- const auto value_shape = value()->shape();
-
- // Make sure the 1st input tensor is 1-D
- LUCI_INTERPRETER_CHECK(dims_shape.num_dims() == 1);
-
- // Make sure the 1st input tensor is int32 or int64
- LUCI_INTERPRETER_CHECK(dims()->element_type() == DataType::S32 or
- dims()->element_type() == DataType::S64);
-
- // Make sure the 2nd input tensor is a scalar
- LUCI_INTERPRETER_CHECK(value_shape.num_dims() == 0)
-
- // Check zero point and scale for S16 and S8
- if (value()->element_type() == loco::DataType::S16 or
- value()->element_type() == loco::DataType::S8)
- {
- LUCI_INTERPRETER_CHECK(value()->scale() == output()->scale());
- LUCI_INTERPRETER_CHECK(value()->zero_point() == output()->zero_point());
-
- if (value()->element_type() == loco::DataType::S16)
- LUCI_INTERPRETER_CHECK(value()->zero_point() == 0);
- }
- // Resize output
- switch (dims()->element_type())
- {
- case DataType::S32:
- configureShape<int32_t>();
- break;
- case DataType::S64:
- configureShape<int64_t>();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Fill::execute() const
-{
- switch (output()->element_type())
- {
- case DataType::S8:
- tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int8_t>(value()),
- getTensorShape(output()), getTensorData<int8_t>(output()));
- break;
- case DataType::S16:
- tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int16_t>(value()),
- getTensorShape(output()), getTensorData<int16_t>(output()));
- break;
- case DataType::S32:
- tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int32_t>(value()),
- getTensorShape(output()), getTensorData<int32_t>(output()));
- break;
- case DataType::S64:
- tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int64_t>(value()),
- getTensorShape(output()), getTensorData<int64_t>(output()));
- break;
- case DataType::FLOAT32:
- tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<float>(value()),
- getTensorShape(output()), getTensorData<float>(output()));
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Floor.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/floor.h>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Floor::Floor(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Floor::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
- output()->resize(input()->shape());
-}
-
-void Floor::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
-
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Floor::evalFloat() const
-{
- tflite::reference_ops::Floor(getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(output()), getTensorData<float>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/FloorDiv.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
-#include <cmath>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-FloorDiv::FloorDiv(const Tensor *input, const Tensor *alpha, Tensor *output)
- : Kernel({input, alpha}, {output})
-{
-}
-
-void FloorDiv::configure()
-{
- LUCI_INTERPRETER_CHECK(x()->element_type() == output()->element_type());
- LUCI_INTERPRETER_CHECK(y()->element_type() == output()->element_type());
-
- output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
-}
-
-void FloorDiv::execute() const
-{
- switch (x()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void FloorDiv::evalFloat() const
-{
- auto FloorDivFunc = [](float x, float y) -> float {
- return std::floor(static_cast<double>(x) / static_cast<double>(y));
- };
-
- const auto x_data = getTensorData<float>(x());
- const auto y_data = getTensorData<float>(y());
-
- // Check the denominator
- for (int i = 0; i < getTensorShape(y()).FlatSize(); ++i)
- {
- LUCI_INTERPRETER_CHECK(y_data[i] != 0);
- }
-
- if (x()->shape() != y()->shape())
- {
- tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
- getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
- getTensorData<float>(output()), FloorDivFunc);
- }
- else
- {
- tflite::reference_ops::BinaryFunction<float, float, float>(
- getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
- getTensorData<float>(output()), FloorDivFunc);
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/FullyConnected.h"
-
-#include "kernels/Utils.h"
-
-#include "PALFullyConnected.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-FullyConnected::FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias,
- Tensor *output, const FullyConnectedParams ¶ms)
- : KernelWithParams<FullyConnectedParams>({input, weights, bias}, {output}, params)
-{
-}
-
-void FullyConnected::configure()
-{
- if (weights()->element_type() == DataType::U8)
- {
- LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8);
- LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::U8);
- LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32)
- }
- else if (weights()->element_type() == DataType::FLOAT32)
- {
- LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
- LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
- LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::FLOAT32)
- }
- else if (weights()->element_type() == DataType::S8)
- {
- LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8);
- LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8);
- LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32)
- }
- else
- {
- throw std::runtime_error("Unsupported type.");
- }
-
- const Shape &input_shape = input()->shape();
- const Shape &weights_shape = weights()->shape();
-
- LUCI_INTERPRETER_CHECK(weights_shape.num_dims() == 2);
- LUCI_INTERPRETER_CHECK(bias() == nullptr ||
- bias()->shape().num_elements() == weights_shape.dim(0));
-
- LUCI_INTERPRETER_CHECK(input_shape.num_elements() % weights_shape.dim(1) == 0);
- const int32_t batch_size = input_shape.num_elements() / weights_shape.dim(1);
- const int32_t num_units = weights_shape.dim(0);
-
- if (bias())
- LUCI_INTERPRETER_CHECK(bias()->shape().num_elements() == weights()->shape().dim(0));
-
- if (params().keep_num_dims == false)
- {
- output()->resize({batch_size, num_units});
- }
- else
- {
- luci_interpreter::Shape output_shape(input_shape.num_dims());
- for (int i = 0; i < input_shape.num_dims(); ++i)
- output_shape.dim(i) = input_shape.dim(i);
- output_shape.dim(input_shape.num_dims() - 1) = num_units;
- output()->resize(output_shape);
- }
-}
-
-void FullyConnected::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::U8:
- evalQuantized();
- break;
- case DataType::S8:
- evalQuantizedS8();
- break;
- case DataType::FLOAT32:
- evalFloat();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void FullyConnected::evalFloat() const
-{
- float activation_min{};
- float activation_max{};
- calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
- tflite::FullyConnectedParams params{};
- params.float_activation_min = activation_min;
- params.float_activation_max = activation_max;
- params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
-
- tflite::reference_ops::FullyConnected(
- params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(weights()),
- getTensorData<float>(weights()), getTensorShape(bias()), getTensorData<float>(bias()),
- getTensorShape(output()), getTensorData<float>(output()));
-}
-
-void FullyConnected::evalQuantized() const
-{
- double real_multiplier = 0.0;
- int output_shift;
- int32_t output_activation_min;
- int32_t output_activation_max;
- int32_t output_multiplier;
- real_multiplier =
- getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
- quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
- calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
- &output_activation_max);
-
- int32_t input_offset = -input()->zero_point();
- int32_t filter_offset = -weights()->zero_point();
- int32_t output_offset = output()->zero_point();
-
- tflite::FullyConnectedParams op_params{};
- op_params.input_offset = input_offset;
- op_params.weights_offset = filter_offset;
- op_params.output_offset = output_offset;
- op_params.output_multiplier = output_multiplier;
- op_params.output_shift = output_shift;
- op_params.quantized_activation_min = output_activation_min;
- op_params.quantized_activation_max = output_activation_max;
- op_params.lhs_cacheable = false;
- op_params.rhs_cacheable = false;
- tflite::reference_ops::FullyConnected(
- op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(weights()),
- getTensorData<uint8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
- getTensorShape(output()), getTensorData<uint8_t>(output()));
-}
-
-void FullyConnected::evalQuantizedS8() const
-{
- double real_multiplier = 0.0;
- int output_shift;
- int32_t output_activation_min;
- int32_t output_activation_max;
- int32_t output_multiplier;
- real_multiplier =
- getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
- quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
- calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
- &output_activation_max);
-
- int32_t input_offset = -input()->zero_point();
- int32_t filter_offset = -weights()->zero_point();
- int32_t output_offset = output()->zero_point();
-
- tflite::FullyConnectedParams op_params{};
- op_params.input_offset = input_offset;
- op_params.weights_offset = filter_offset;
- op_params.output_offset = output_offset;
- op_params.output_multiplier = output_multiplier;
- op_params.output_shift = output_shift;
- op_params.quantized_activation_min = output_activation_min;
- op_params.quantized_activation_max = output_activation_max;
- op_params.lhs_cacheable = false;
- op_params.rhs_cacheable = false;
- luci_interpreter_pal::FullyConnected<int8_t>(
- op_params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(weights()),
- getTensorData<int8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
- getTensorShape(output()), getTensorData<int8_t>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
-#define LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
-
-#include "core/Kernel.h"
-#include "core/KernelParams.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class FullyConnected : public KernelWithParams<FullyConnectedParams>
-{
-public:
- FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias, Tensor *output,
- const FullyConnectedParams ¶ms);
-
- const Tensor *input() const { return _inputs[0]; }
- const Tensor *weights() const { return _inputs[1]; }
- const Tensor *bias() const { return _inputs[2]; }
- Tensor *output() const { return _outputs[0]; }
-
- void configure() override;
- void execute() const override;
-
-private:
- void evalFloat() const;
- void evalQuantized() const;
- void evalQuantizedS8() const;
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/FullyConnected.h"
-#include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-template <typename T>
-void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
- std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
- std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
- std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
-{
- std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
- Tensor weights_tensor =
- makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
- Tensor bias_tensor =
- makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
- FullyConnectedParams params{};
- params.activation = Activation::RELU;
-
- FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
- kernel.configure();
- memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
- EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
-}
-
-template <>
-void Check<int8_t>(std::initializer_list<int32_t> input_shape,
- std::initializer_list<int32_t> weights_shape,
- std::initializer_list<int32_t> bias_shape,
- std::initializer_list<int32_t> output_shape,
- std::initializer_list<float> input_data,
- std::initializer_list<float> weights_data,
- std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
-{
- std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
- const float quantized_tolerance = getTolerance(-127, 128, 255);
- std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-63.5, 64);
- std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
- Tensor input_tensor =
- makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
- input_data, memory_manager.get());
- Tensor weights_tensor =
- makeInputTensor<DataType::S8>(weights_shape, input_quant_param.first, input_quant_param.second,
- weights_data, memory_manager.get());
- Tensor bias_tensor =
- makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
- bias_data, memory_manager.get());
- Tensor output_tensor =
- makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
-
- FullyConnectedParams params{};
- params.activation = Activation::RELU;
-
- FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
- kernel.configure();
- memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
- EXPECT_THAT(dequantizeTensorData(output_tensor),
- FloatArrayNear(output_data, quantized_tolerance));
-}
-
-template <>
-void Check<uint8_t>(
- std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
- std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
- std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
- std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
-{
- std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
- const float quantized_tolerance = getTolerance(-127, 128, 255);
- std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
- std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
- Tensor input_tensor =
- makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
- input_data, memory_manager.get());
- Tensor weights_tensor =
- makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first, input_quant_param.second,
- weights_data, memory_manager.get());
- Tensor bias_tensor =
- makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
- bias_data, memory_manager.get());
- Tensor output_tensor =
- makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
-
- FullyConnectedParams params{};
- params.activation = Activation::RELU;
-
- FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
- kernel.configure();
- memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
- EXPECT_THAT(dequantizeTensorData(output_tensor),
- FloatArrayNear(output_data, quantized_tolerance));
-}
-
-template <typename T> class FullyConnectedTest : public ::testing::Test
-{
-};
-
-using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
-TYPED_TEST_SUITE(FullyConnectedTest, DataTypes);
-
-TYPED_TEST(FullyConnectedTest, Simple)
-{
- Check<TypeParam>({3, 2, 2, 1}, {3, 6}, {3}, {2, 3},
- {
- -3, -5, 5, 4, 9, -2, // batch = 0
- -3, -2, -4, 9, -8, 1, // batch = 1
- },
- {
- -3, -7, 4, -4, -6, 4, // unit = 0
- 3, 5, 2, 3, -3, -8, // unit = 1
- -3, 7, 4, 9, 0, -5, // unit = 2
- },
- {-1, -5, -8},
- {
- 0, 0, 32, // batch = 0
- 22, 11, 47, // batch = 1
- });
-}
-
-TEST(FullyConnectedTest, InvalidBiasType_NEG)
-{
- Shape input_shape{3, 2, 2, 1};
- std::vector<float> input_data{
- -3, -5, 5, 4, 9, -2, // batch = 0
- -3, -2, -4, 9, -8, 1, // batch = 1
- };
- Shape weights_shape{3, 6};
- std::vector<float> weights_data{
- -3, -7, 4, -4, -6, 4, // unit = 0
- 3, 5, 2, 3, -3, -8, // unit = 1
- -3, 7, 4, 9, 0, -5, // unit = 2
- };
- Shape bias_shape{3};
- std::vector<int32_t> bias_data{-1, -5, -8};
-
- std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
- Tensor weights_tensor =
- makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
- Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
- FullyConnectedParams params{};
- params.activation = Activation::RELU;
-
- FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST(FullyConnectedTest, InvalidWeightShapeDim_NEG)
-{
- Shape input_shape{3, 2, 2, 1};
- std::vector<float> input_data{
- -3, -5, 5, 4, 9, -2, // batch = 0
- -3, -2, -4, 9, -8, 1, // batch = 1
- };
- Shape weights_shape{1, 3, 6};
- std::vector<float> weights_data{
- -3, -7, 4, -4, -6, 4, // unit = 0
- 3, 5, 2, 3, -3, -8, // unit = 1
- -3, 7, 4, 9, 0, -5, // unit = 2
- };
- Shape bias_shape{3};
- std::vector<float> bias_data{-1, -5, -8};
-
- std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
- Tensor weights_tensor =
- makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
- Tensor bias_tensor =
- makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
- FullyConnectedParams params{};
- params.activation = Activation::RELU;
-
- FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST(FullyConnectedTest, BiasElementNumWeightDimMismatch_NEG)
-{
- Shape input_shape{3, 2, 2, 1};
- std::vector<float> input_data{
- -3, -5, 5, 4, 9, -2, // batch = 0
- -3, -2, -4, 9, -8, 1, // batch = 1
- };
- Shape weights_shape{6, 3};
- std::vector<float> weights_data{
- -3, -7, 4, // unit = 0
- -4, -6, 4, // unit = 1
- 3, 5, 2, // unit = 2
- 3, -3, -8, // unit = 3
- -3, 7, 4, // unit = 4
- 9, 0, -5, // unit = 5
- };
- Shape bias_shape{3};
- std::vector<float> bias_data{-1, -5, -8};
-
- std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
- Tensor weights_tensor =
- makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
- Tensor bias_tensor =
- makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
- FullyConnectedParams params{};
- params.activation = Activation::RELU;
-
- FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2021 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Gather.h"
-#include "kernels/Utils.h"
-#include "PALGather.h"
-
-#include <stdexcept>
-#include <cassert>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Gather::Gather(const Tensor *params, const Tensor *indices, Tensor *output,
- const GatherParams &gparams)
- : KernelWithParams<GatherParams>({params, indices}, {output}, gparams)
-{
-}
-
-void Gather::configure()
-{
- if (params()->element_type() == DataType::FLOAT32)
- {
- LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
- }
- else
- {
- throw std::runtime_error("Unsupported type.");
- }
-
- LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32 ||
- indices()->element_type() == DataType::S64);
-
- // refer tensorflow/lite/kernels/gather.cc
-
- const Shape ¶ms_shape = params()->shape();
- const Shape &indices_shape = indices()->shape();
-
- int axis = _params.axis;
- if (axis < 0)
- {
- axis += params_shape.num_dims();
- }
- LUCI_INTERPRETER_CHECK(0 <= axis && axis < params_shape.num_dims());
-
- int batch_dims = _params.batch_dims;
- // batch_dims should be in range: [-rank(indices), rank(indices)].
- // Negative batch_dims is added with rank of positions.
- if (batch_dims < 0)
- {
- batch_dims += indices_shape.num_dims();
- }
- LUCI_INTERPRETER_CHECK(batch_dims <= axis);
- LUCI_INTERPRETER_CHECK(0 <= batch_dims && batch_dims < params_shape.num_dims());
- LUCI_INTERPRETER_CHECK(batch_dims <= indices_shape.num_dims());
- for (int i = 0; i < batch_dims; ++i)
- {
- LUCI_INTERPRETER_CHECK(params_shape.dim(i) == indices_shape.dim(i));
- }
-
- const int num_dimensions = params_shape.num_dims() + indices_shape.num_dims() - 1 - batch_dims;
-
- Shape output_shape(num_dimensions);
- int output_index = 0;
- for (int i = 0; i < axis; ++i)
- {
- output_shape.dim(output_index++) = params_shape.dim(i);
- }
- for (int i = batch_dims; i < indices_shape.num_dims(); ++i)
- {
- output_shape.dim(output_index++) = indices_shape.dim(i);
- }
- for (int i = axis + 1; i < params_shape.num_dims(); ++i)
- {
- output_shape.dim(output_index++) = params_shape.dim(i);
- }
- output()->resize(output_shape);
-}
-
-void Gather::execute() const
-{
- switch (params()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Gather::evalFloat() const
-{
- assert(indices()->element_type() == DataType::S32 || indices()->element_type() == DataType::S64);
-
- const auto params_data = getTensorData<float>(params());
- auto output_data = getTensorData<float>(output());
-
- tflite::GatherParams tparams;
- tparams.axis = _params.axis;
- tparams.batch_dims = _params.batch_dims;
-
- if (indices()->element_type() == DataType::S32)
- {
- const auto indices_data = getTensorData<int32_t>(indices());
-
- luci_interpreter_pal::Gather<float, int32_t>(tparams, getTensorShape(params()), params_data,
- getTensorShape(indices()), indices_data,
- getTensorShape(output()), output_data);
- }
- else
- {
- const auto indices_data = getTensorData<int64_t>(indices());
-
- luci_interpreter_pal::Gather<float, int64_t>(tparams, getTensorShape(params()), params_data,
- getTensorShape(indices()), indices_data,
- getTensorShape(output()), output_data);
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Greater.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Greater::Greater(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
-
-void Greater::configure()
-{
- LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
- LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
-
- if (x()->element_type() == DataType::U8)
- {
- quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
- quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
- }
- output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
-}
-
-void Greater::execute() const
-{
- switch (x()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::S64:
- evalInteger<int64_t>();
- break;
- case DataType::S32:
- evalInteger<int32_t>();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Greater::evalFloat() const
-{
- const auto x_data = getTensorData<float>(x());
- const auto y_data = getTensorData<float>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowGreater(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
- else
- {
- tflite::reference_ops::Greater(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
- y_data, getTensorShape(output()), output_data);
- }
-}
-
-template <typename T> void Greater::evalInteger() const
-{
- const auto x_data = getTensorData<T>(x());
- const auto y_data = getTensorData<T>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowGreaterNoScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
- else
- {
- tflite::reference_ops::GreaterNoScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data, getTensorShape(output()),
- output_data);
- }
-}
-
-void Greater::evalQuantized() const
-{
- const auto x_data = getTensorData<uint8_t>(x());
- const auto y_data = getTensorData<uint8_t>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.left_shift = 8;
- op_params.input1_offset = -x()->zero_point(); // Note the '-'
- op_params.input1_shift = _x_shift;
- op_params.input1_multiplier = _x_multiplier;
- op_params.input2_offset = -y()->zero_point(); // Note the '-'
- op_params.input2_shift = _y_shift;
- op_params.input2_multiplier = _y_multiplier;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowGreaterWithScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
- else
- {
- tflite::reference_ops::GreaterWithScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data, getTensorShape(output()),
- output_data);
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/GreaterEqual.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-GreaterEqual::GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output)
- : Kernel({x, y}, {output})
-{
-}
-
-void GreaterEqual::configure()
-{
- LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
- LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
-
- if (x()->element_type() == DataType::U8)
- {
- quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
- quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
- }
- output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
-}
-
-void GreaterEqual::execute() const
-{
- switch (x()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::S64:
- evalInteger<int64_t>();
- break;
- case DataType::S32:
- evalInteger<int32_t>();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void GreaterEqual::evalFloat() const
-{
- const auto x_data = getTensorData<float>(x());
- const auto y_data = getTensorData<float>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowGreaterEqual(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
- else
- {
- tflite::reference_ops::GreaterEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
- y_data, getTensorShape(output()), output_data);
- }
-}
-
-template <typename T> void GreaterEqual::evalInteger() const
-{
- const auto x_data = getTensorData<T>(x());
- const auto y_data = getTensorData<T>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
- op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
- output_data);
- }
- else
- {
- tflite::reference_ops::GreaterEqualNoScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
-}
-
-void GreaterEqual::evalQuantized() const
-{
- const auto x_data = getTensorData<uint8_t>(x());
- const auto y_data = getTensorData<uint8_t>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.left_shift = 8;
- op_params.input1_offset = -x()->zero_point(); // Note the '-'
- op_params.input1_shift = _x_shift;
- op_params.input1_multiplier = _x_multiplier;
- op_params.input2_offset = -y()->zero_point(); // Note the '-'
- op_params.input2_shift = _y_shift;
- op_params.input2_multiplier = _y_multiplier;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
- op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
- output_data);
- }
- else
- {
- tflite::reference_ops::GreaterEqualWithScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/InstanceNorm.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/common.h>
-#include <cmath>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-InstanceNorm::InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta,
- Tensor *output, const InstanceNormParams ¶ms)
- : KernelWithParams<InstanceNormParams>({input, gamma, beta}, {output}, params)
-{
-}
-
-void InstanceNorm::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
- LUCI_INTERPRETER_CHECK(gamma()->element_type() == input()->element_type());
- LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1);
- LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(3) ||
- gamma()->shape().dim(0) == 1);
- LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type());
- LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1);
- LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(3) ||
- beta()->shape().dim(0) == 1);
- output()->resize(input()->shape());
-}
-
-void InstanceNorm::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void InstanceNorm::evalFloat() const
-{
- float activation_min, activation_max;
- calculateActivationRange(params().activation, &activation_min, &activation_max);
- auto input_shape = getTensorShape(input());
- auto output_shape = getTensorShape(output());
- const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
- const int32_t heights = tflite::MatchingDim(input_shape, 1, output_shape, 1);
- const int32_t widths = tflite::MatchingDim(input_shape, 2, output_shape, 2);
- const int32_t channels = tflite::MatchingDim(input_shape, 3, output_shape, 3);
- const float *input_data = getTensorData<float>(input());
- const float *gamma_data = getTensorData<float>(gamma());
- auto gamma_shape = getTensorShape(gamma());
- bool single_gamma = gamma_shape.DimensionsCount() == 1 && gamma_shape.Dims(0) == 1;
- const float *beta_data = getTensorData<float>(beta());
- auto beta_shape = getTensorShape(beta());
- bool single_beta = beta_shape.DimensionsCount() == 1 && beta_shape.Dims(0) == 1;
- float *output_data = getTensorData<float>(output());
- for (int32_t batch = 0; batch < batches; batch++)
- {
- for (int32_t channel = 0; channel < channels; channel++)
- {
- double sum = 0.0f;
- double square_sum = 0.0f;
- int32_t size = heights * widths;
- for (int32_t height = 0; height < heights; height++)
- {
- for (int32_t width = 0; width < widths; width++)
- {
- double input_val = input_data[tflite::Offset(input_shape, batch, height, width, channel)];
- sum += input_val;
- square_sum += (input_val * input_val);
- }
- }
- double mean = sum / size;
- double var = square_sum / size - mean * mean;
-
- double gamma = single_gamma ? gamma_data[0] : gamma_data[channel];
- double beta = single_beta ? beta_data[0] : beta_data[channel];
- double a = gamma / (std::sqrt(var + params().epsilon));
- double b = -mean * a + beta;
-
- for (int32_t height = 0; height < heights; height++)
- {
- for (int32_t width = 0; width < widths; width++)
- {
- double input_value =
- input_data[tflite::Offset(output_shape, batch, height, width, channel)];
- double output_value = input_value * a + b;
- output_data[tflite::Offset(output_shape, batch, height, width, channel)] =
- tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
- activation_max);
- }
- }
- }
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/L2Normalize.h"
-#include "kernels/Utils.h"
-
-#include "PALL2Normalize.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-L2Normalize::L2Normalize(const Tensor *input, Tensor *output, const L2NormParams ¶ms)
- : KernelWithParams<L2NormParams>({input}, {output}, params)
-{
-}
-
-void L2Normalize::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= 4);
- LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
- output()->element_type() == DataType::U8);
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
- if (output()->element_type() == DataType::U8)
- {
- LUCI_INTERPRETER_CHECK(output()->scale() == (1. / 128.));
- LUCI_INTERPRETER_CHECK(output()->zero_point() == 128);
- }
- LUCI_INTERPRETER_CHECK(params().activation == Activation::NONE);
- output()->resize(input()->shape());
-}
-
-void L2Normalize::execute() const
-{
- switch (output()->element_type())
- {
- case DataType::FLOAT32:
- eval<float>(0);
- break;
- case DataType::U8:
- eval<uint8_t>(input()->zero_point());
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-template <typename T> void L2Normalize::eval(int32_t zero_point) const
-{
- tflite::L2NormalizationParams op_params{};
- op_params.input_zero_point = zero_point;
- luci_interpreter_pal::L2Normalization(op_params, getTensorShape(input()),
- getTensorData<T>(input()), getTensorShape(output()),
- getTensorData<T>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/L2Pool2D.h"
-
-#include "kernels/Utils.h"
-
-#include "PALL2Pool2D.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-L2Pool2D::L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms)
- : KernelWithParams<Pool2DParams>({input}, {output}, params)
-{
-}
-
-void L2Pool2D::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-
- int batches = input()->shape().dim(0);
- int height = input()->shape().dim(1);
- int width = input()->shape().dim(2);
- int channels_out = input()->shape().dim(3);
-
- // Matching GetWindowedOutputSize in TensorFlow.
- auto padding = params().padding;
- int out_width, out_height;
- out_width = computeOutputSize(padding, width, params().filter_width, params().stride_width, 1);
- out_height =
- computeOutputSize(padding, height, params().filter_height, params().stride_height, 1);
- _padding_width =
- computePadding(params().stride_width, 1, width, params().filter_width, out_width);
- _padding_height =
- computePadding(params().stride_height, 1, height, params().filter_height, out_height);
-
- LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
- output()->resize({batches, out_height, out_width, channels_out});
-}
-
-void L2Pool2D::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- float activation_min, activation_max;
- calculateActivationRange(params().activation, &activation_min, &activation_max);
- tflite::PoolParams op_params;
- op_params.stride_height = params().stride_height;
- op_params.stride_width = params().stride_width;
- op_params.filter_height = params().filter_height;
- op_params.filter_width = params().filter_width;
- op_params.padding_values.height = _padding_height;
- op_params.padding_values.width = _padding_width;
- op_params.float_activation_min = activation_min;
- op_params.float_activation_max = activation_max;
- luci_interpreter_pal::L2Pool(op_params, getTensorShape(input()),
- getTensorData<float>(input()), getTensorShape(output()),
- getTensorData<float>(output()));
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/LeakyRelu.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
-
-#include "PALLeakyRelu.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-LeakyRelu::LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams ¶ms)
- : KernelWithParams<LeakyReluParams>({input}, {output}, params)
-{
-}
-
-void LeakyRelu::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
- if (input()->element_type() == DataType::U8)
- {
- double alpha_multiplier = input()->scale() * params().alpha / output()->scale();
- quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha);
- double identity_multiplier = input()->scale() / output()->scale();
- quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
- }
- output()->resize(input()->shape());
-}
-
-void LeakyRelu::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void LeakyRelu::evalFloat() const
-{
- tflite::LeakyReluParams op_params{};
- op_params.alpha = params().alpha;
- luci_interpreter_pal::LeakyRelu(op_params, getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(output()), getTensorData<float>(output()));
-}
-
-void LeakyRelu::evalQuantized() const
-{
- tflite::LeakyReluParams op_params{};
- op_params.input_offset = input()->zero_point();
- op_params.output_offset = output()->zero_point();
- op_params.output_multiplier_alpha = _output_multiplier_alpha;
- op_params.output_shift_alpha = _output_shift_alpha;
- op_params.output_multiplier_identity = _output_multiplier_identity;
- op_params.output_shift_identity = _output_shift_identity;
-
- tflite::reference_ops::QuantizeLeakyRelu(
- op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Less.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Less::Less(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
-
-void Less::configure()
-{
- LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
- LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
-
- if (x()->element_type() == DataType::U8)
- {
- quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
- quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
- }
- output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
-}
-
-void Less::execute() const
-{
- switch (x()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::S64:
- evalInteger<int64_t>();
- break;
- case DataType::S32:
- evalInteger<int32_t>();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Less::evalFloat() const
-{
- const auto x_data = getTensorData<float>(x());
- const auto y_data = getTensorData<float>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowLess(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
- else
- {
- tflite::reference_ops::Less(op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
-}
-
-template <typename T> void Less::evalInteger() const
-{
- const auto x_data = getTensorData<T>(x());
- const auto y_data = getTensorData<T>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowLessNoScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
- else
- {
- tflite::reference_ops::LessNoScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data, getTensorShape(output()),
- output_data);
- }
-}
-
-void Less::evalQuantized() const
-{
- const auto x_data = getTensorData<uint8_t>(x());
- const auto y_data = getTensorData<uint8_t>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.left_shift = 8;
- op_params.input1_offset = -x()->zero_point(); // Note the '-'
- op_params.input1_shift = _x_shift;
- op_params.input1_multiplier = _x_multiplier;
- op_params.input2_offset = -y()->zero_point(); // Note the '-'
- op_params.input2_shift = _y_shift;
- op_params.input2_multiplier = _y_multiplier;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowLessWithScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
- else
- {
- tflite::reference_ops::LessWithScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data, getTensorShape(output()),
- output_data);
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/LessEqual.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-LessEqual::LessEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
-
-void LessEqual::configure()
-{
- LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
- LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
-
- if (x()->element_type() == DataType::U8)
- {
- quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
- quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
- }
- output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
-}
-
-void LessEqual::execute() const
-{
- switch (x()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::S64:
- evalInteger<int64_t>();
- break;
- case DataType::S32:
- evalInteger<int32_t>();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void LessEqual::evalFloat() const
-{
- const auto x_data = getTensorData<float>(x());
- const auto y_data = getTensorData<float>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowLessEqual(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
- else
- {
- tflite::reference_ops::LessEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
- y_data, getTensorShape(output()), output_data);
- }
-}
-
-template <typename T> void LessEqual::evalInteger() const
-{
- const auto x_data = getTensorData<T>(x());
- const auto y_data = getTensorData<T>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowLessEqualNoScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
- else
- {
- tflite::reference_ops::LessEqualNoScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data, getTensorShape(output()),
- output_data);
- }
-}
-
-void LessEqual::evalQuantized() const
-{
- const auto x_data = getTensorData<uint8_t>(x());
- const auto y_data = getTensorData<uint8_t>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.left_shift = 8;
- op_params.input1_offset = -x()->zero_point(); // Note the '-'
- op_params.input1_shift = _x_shift;
- op_params.input1_multiplier = _x_multiplier;
- op_params.input2_offset = -y()->zero_point(); // Note the '-'
- op_params.input2_shift = _y_shift;
- op_params.input2_multiplier = _y_multiplier;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowLessEqualWithScaling(
- op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
- output_data);
- }
- else
- {
- tflite::reference_ops::LessEqualWithScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/LocalResponseNormalization.h"
-
-#include "kernels/Utils.h"
-
-#include "PALLocalResponseNormalization.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-LocalResponseNormalization::LocalResponseNormalization(
- const Tensor *input, Tensor *output, const LocalResponseNormalizationParams ¶ms)
- : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params)
-{
-}
-
-void LocalResponseNormalization::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
- LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
- output()->resize(input()->shape());
-}
-
-void LocalResponseNormalization::execute() const
-{
- switch (output()->element_type())
- {
- case DataType::FLOAT32:
- tflite::LocalResponseNormalizationParams op_params;
- op_params.range = params().radius;
- op_params.bias = params().bias;
- op_params.alpha = params().alpha;
- op_params.beta = params().beta;
- luci_interpreter_pal::LocalResponseNormalization(
- op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()),
- getTensorData<float>(output()));
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/LogSoftmax.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/log_softmax.h>
-
-#include "PALLogSoftmax.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-LogSoftmax::LogSoftmax(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void LogSoftmax::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
- if (input()->element_type() == DataType::U8)
- {
- LUCI_INTERPRETER_CHECK(output()->scale() == 16. / 256);
- LUCI_INTERPRETER_CHECK(output()->zero_point() == 255);
-
- tflite::SoftmaxParams params{};
-
- params.table = _table;
- params.beta = 1.0;
- luci_interpreter_pal::PopulateSoftmaxLookupTable(¶ms, input()->scale(), params.beta);
- }
- output()->resize(input()->shape());
-}
-
-void LogSoftmax::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void LogSoftmax::evalFloat() const
-{
- tflite::SoftmaxParams params{};
- tflite::reference_ops::LogSoftmax(params, getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(output()), getTensorData<float>(output()));
-}
-
-void LogSoftmax::evalQuantized() const
-{
- const auto input_shape = getTensorShape(input());
- const auto output_shape = getTensorShape(output());
- const auto input_scale = input()->scale();
- uint8_t *output_data = getTensorData<uint8_t>(output());
- const uint8_t *input_data = getTensorData<uint8_t>(input());
- const float beta = 1.0;
-
- tflite::SoftmaxParams params{};
-
- params.table = const_cast<float *>(_table);
- params.zero_point = output()->zero_point();
- params.scale = output()->scale();
-
- luci_interpreter_pal::InitializeParams(¶ms, input_scale, beta);
- luci_interpreter_pal::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
- output_data);
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/LogicalAnd.h"
-
-#include "kernels/Utils.h"
-
-#include "kernels/BinaryOpCommon.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-LogicalAnd::LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output)
- : Kernel({input1, input2}, {output})
-{
-}
-
-void LogicalAnd::configure()
-{
- LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
- LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
- output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void LogicalAnd::execute() const
-{
- switch (input1()->element_type())
- {
- case DataType::BOOL:
- evalLogicalAnd();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-inline void LogicalAnd::evalLogicalAnd() const
-{
- BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()),
- getTensorShape(input2()), getTensorData<bool>(input2()),
- getTensorShape(output()), getTensorData<bool>(output()),
- [](bool x, bool y) { return x && y; });
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/LogicalNot.h"
-
-#include "kernels/Utils.h"
-
-#include "kernels/BinaryOpCommon.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-LogicalNot::LogicalNot(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void LogicalNot::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
- output()->resize(input()->shape());
-}
-
-void LogicalNot::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::BOOL:
- evalLogicalNot();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-inline void LogicalNot::evalLogicalNot() const
-{
- const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
- bool *output_data = getTensorData<bool>(output());
- const bool *input_data = getTensorData<bool>(input());
- for (int i = 0; i < size; ++i)
- {
- output_data[i] = !input_data[i];
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/LogicalOr.h"
-
-#include "kernels/Utils.h"
-#include "kernels/BinaryOpCommon.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-LogicalOr::LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output)
- : Kernel({input1, input2}, {output})
-{
-}
-
-void LogicalOr::configure()
-{
- LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
- LUCI_INTERPRETER_CHECK(input1()->element_type() == DataType::BOOL);
- output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void LogicalOr::execute() const
-{
- BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()),
- getTensorShape(input2()), getTensorData<bool>(input2()),
- getTensorShape(output()), getTensorData<bool>(output()),
- [](bool x, bool y) { return x || y; });
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Logistic.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/logistic.h>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Logistic::Logistic(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Logistic::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
- if (input()->element_type() == DataType::U8)
- {
- LUCI_INTERPRETER_CHECK(output()->scale() == 1. / 256);
- populateLookupTable();
- }
- output()->resize(input()->shape());
-}
-
-void Logistic::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Logistic::evalFloat() const
-{
- tflite::reference_ops::Logistic(getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(output()), getTensorData<float>(output()));
-}
-
-void Logistic::evalQuantized() const
-{
- const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
- uint8_t *output_data = getTensorData<uint8_t>(output());
- const uint8_t *input_data = getTensorData<uint8_t>(input());
- for (int i = 0; i < size; ++i)
- {
- output_data[i] = getTableValue(input_data[i]);
- }
-}
-
-void Logistic::populateLookupTable()
-{
- const auto input_scale = static_cast<double>(input()->scale());
- const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
- const auto output_scale = static_cast<double>(output()->scale());
- const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
- const float inverse_scale = 1 / output_scale;
- int32_t maxval = std::numeric_limits<uint8_t>::max();
- int32_t minval = std::numeric_limits<uint8_t>::min();
- for (int32_t val = minval; val <= maxval; ++val)
- {
- const float dequantized = input_scale * (val - input_zero_point);
- const float transformed = 1.0f / (1.0f + std::exp(-dequantized));
- const float rescaled = std::round(transformed * inverse_scale);
- const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
- setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
- static_cast<uint8_t>(val));
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_LOGISTIC_H
-#define LUCI_INTERPRETER_KERNELS_LOGISTIC_H
-
-#include "core/Kernel.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class Logistic : public Kernel
-{
-public:
- Logistic(const Tensor *input, Tensor *output);
-
- const Tensor *input() const { return _inputs[0]; }
- Tensor *output() const { return _outputs[0]; }
-
- void configure() override;
- void execute() const override;
-
-private:
- void evalFloat() const;
- void evalQuantized() const;
- void populateLookupTable();
- void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; };
- uint8_t getTableValue(uint8_t idx) const { return _table[idx]; };
-
-private:
- uint8_t _table[256]{};
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_LOGISTIC_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Logistic.h"
-#include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-template <typename T>
-void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
- std::initializer_list<float> input_data, std::initializer_list<float> output_data)
-{
- std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-
- Tensor input_tensor =
- makeInputTensor<getElementType<T>()>(input_shape, input_data, memory_manager.get());
- Tensor output_tensor = makeOutputTensor(getElementType<T>());
-
- Logistic kernel(&input_tensor, &output_tensor);
- kernel.configure();
- memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
-}
-
-template <>
-void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
- std::initializer_list<int32_t> output_shape,
- std::initializer_list<float> input_data,
- std::initializer_list<float> output_data)
-{
- std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-
- std::pair<float, int32_t> input_quant_param =
- quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
- Tensor input_tensor =
- makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
- input_data, memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
-
- Logistic kernel(&input_tensor, &output_tensor);
- kernel.configure();
- memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- EXPECT_THAT(dequantizeTensorData(output_tensor),
- FloatArrayNear(output_data, output_tensor.scale() * 2));
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
-}
-
-template <typename T> class LogisticTest : public ::testing::Test
-{
-};
-
-using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_SUITE(LogisticTest, DataTypes);
-
-TYPED_TEST(LogisticTest, Simple)
-{
- Check<TypeParam>(
- {89}, {89},
- {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, -9.0909090909, -8.8636363636,
- -8.6363636364, -8.4090909091, -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000,
- -7.2727272727, -7.0454545455, -6.8181818182, -6.5909090909, -6.3636363636, -6.1363636364,
- -5.9090909091, -5.6818181818, -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727,
- -4.5454545455, -4.3181818182, -4.0909090909, -3.8636363636, -3.6363636364, -3.4090909091,
- -3.1818181818, -2.9545454545, -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455,
- -1.8181818182, -1.5909090909, -1.3636363636, -1.1363636364, -0.9090909091, -0.6818181818,
- -0.4545454545, -0.2272727273, 0.0000000000, 0.2272727273, 0.4545454545, 0.6818181818,
- 0.9090909091, 1.1363636364, 1.3636363636, 1.5909090909, 1.8181818182, 2.0454545455,
- 2.2727272727, 2.5000000000, 2.7272727273, 2.9545454545, 3.1818181818, 3.4090909091,
- 3.6363636364, 3.8636363636, 4.0909090909, 4.3181818182, 4.5454545455, 4.7727272727,
- 5.0000000000, 5.2272727273, 5.4545454545, 5.6818181818, 5.9090909091, 6.1363636364,
- 6.3636363636, 6.5909090909, 6.8181818182, 7.0454545455, 7.2727272727, 7.5000000000,
- 7.7272727273, 7.9545454545, 8.1818181818, 8.4090909091, 8.6363636364, 8.8636363636,
- 9.0909090909, 9.3181818182, 9.5454545455, 9.7727272727, 10.0000000000},
- {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, 0.0001414198,
- 0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, 0.0004404502, 0.0005527786,
- 0.0006937345, 0.0008706021, 0.0010925128, 0.0013709094, 0.0017201256, 0.0021581065,
- 0.0027073042, 0.0033957870, 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576,
- 0.0105038445, 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562,
- 0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, 0.1145124805,
- 0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, 0.2871859014, 0.3358556241,
- 0.3882805886, 0.4434251301, 0.5000000000, 0.5565748699, 0.6117194114, 0.6641443759,
- 0.7128140986, 0.7570113728, 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195,
- 0.9065929953, 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438,
- 0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, 0.9916136424,
- 0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, 0.9972926958, 0.9978418935,
- 0.9982798744, 0.9986290906, 0.9989074872, 0.9991293979, 0.9993062655, 0.9994472214,
- 0.9995595498, 0.9996490604, 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802,
- 0.9998873271, 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021});
-}
-
-TEST(LogisticTest, IvalidInputOutputType_NEG)
-{
- std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-
- Shape input_shape = {1};
- std::vector<float> input_data{10};
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
-
- Logistic kernel(&input_tensor, &output_tensor);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-TEST(LogisticTest, IvalidQuantParam_NEG)
-{
- std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
- Shape input_shape = {2};
- std::vector<float> input_data{-10, 10};
- std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-10, 10);
- Tensor input_tensor =
- makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
- input_data, memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 255, 0);
-
- Logistic kernel(&input_tensor, &output_tensor);
- EXPECT_ANY_THROW(kernel.configure());
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/MaxPool2D.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
-#include <tensorflow/lite/kernels/internal/reference/pooling.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-MaxPool2D::MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms)
- : KernelWithParams<Pool2DParams>({input}, {output}, params)
-{
-}
-
-void MaxPool2D::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
- assert(input()->shape().num_dims() == 4);
- const Shape &input_shape = input()->shape();
- const int32_t batches = input_shape.dim(0);
- const int32_t input_height = input_shape.dim(1);
- const int32_t input_width = input_shape.dim(2);
- const int32_t depth = input_shape.dim(3);
-
- const int32_t output_height =
- computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
- const int32_t output_width =
- computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
-
- _padding_height =
- computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
- _padding_width =
- computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
-
- output()->resize({batches, output_height, output_width, depth});
- if (input()->element_type() == DataType::U8)
- {
- LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
- LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
- }
- else if (input()->element_type() == DataType::S16)
- {
- LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
- LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
- }
-}
-
-void MaxPool2D::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- case DataType::S16:
- evalSInt16();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void MaxPool2D::evalFloat() const
-{
- float activation_min{};
- float activation_max{};
- calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
- tflite::PoolParams params{};
- params.padding_values.height = _padding_height;
- params.padding_values.width = _padding_width;
- params.stride_height = _params.stride_height;
- params.stride_width = _params.stride_width;
- params.filter_height = _params.filter_height;
- params.filter_width = _params.filter_width;
- params.float_activation_min = activation_min;
- params.float_activation_max = activation_max;
-
- tflite::reference_ops::MaxPool(params, getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(output()), getTensorData<float>(output()));
-}
-
-void MaxPool2D::evalQuantized() const
-{
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- tflite::PoolParams params{};
- params.padding_values.height = _padding_height;
- params.padding_values.width = _padding_width;
- params.stride_height = _params.stride_height;
- params.stride_width = _params.stride_width;
- params.filter_height = _params.filter_height;
- params.filter_width = _params.filter_width;
- params.quantized_activation_min = activation_min;
- params.quantized_activation_max = activation_max;
-
- tflite::reference_ops::MaxPool(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
- getTensorShape(output()), getTensorData<uint8_t>(output()));
-}
-
-void MaxPool2D::evalSInt16() const
-{
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- tflite::PoolParams params{};
- params.padding_values.height = _padding_height;
- params.padding_values.width = _padding_width;
- params.stride_height = _params.stride_height;
- params.stride_width = _params.stride_width;
- params.filter_height = _params.filter_height;
- params.filter_width = _params.filter_width;
- params.quantized_activation_min = activation_min;
- params.quantized_activation_max = activation_max;
-
- tflite::reference_integer_ops::MaxPool(
- params, getTensorShape(input()), getTensorData<int16_t>(input()), //
- getTensorShape(output()), getTensorData<int16_t>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
-#define LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
-
-#include "core/Kernel.h"
-#include "core/KernelParams.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class MaxPool2D : public KernelWithParams<Pool2DParams>
-{
-public:
- MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms);
-
- const Tensor *input() const { return _inputs[0]; }
- Tensor *output() const { return _outputs[0]; }
-
- void configure() override;
- void execute() const override;
-
-private:
- void evalFloat() const;
- void evalQuantized() const;
- void evalSInt16() const;
-
-private:
- int32_t _padding_height{};
- int32_t _padding_width{};
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/MaxPool2D.h"
-#include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-class MaxPool2DTest : public ::testing::Test
-{
-protected:
- void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
-
- std::unique_ptr<IMemoryManager> _memory_manager;
-};
-
-TEST_F(MaxPool2DTest, Float)
-{
- Shape input_shape{1, 3, 5, 1};
- std::vector<float> input_data{
- 1, -1, 0, -2, 2, //
- -7, -6, -5, -4, -3, //
- 5, 4, 3, 6, 7, //
- };
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
- Pool2DParams params{};
- params.padding = Padding::VALID;
- params.filter_height = 2;
- params.filter_width = 3;
- params.stride_height = 1;
- params.stride_width = 2;
- params.activation = Activation::RELU6;
-
- MaxPool2D kernel(&input_tensor, &output_tensor, params);
- kernel.configure();
- _memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- std::vector<float> ref_output_data{
- 1, 2, //
- 5, 6, //
- };
- std::initializer_list<int32_t> ref_output_shape{1, 2, 2, 1};
- EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-}
-
-TEST_F(MaxPool2DTest, Uint8)
-{
- std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375, 15.9375);
- std::vector<float> input_data{
- 0, -6, 12, 4, //
- -3, -2, 10, 7, //
- };
- Tensor input_tensor = makeInputTensor<DataType::U8>(
- {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
-
- Pool2DParams params{};
- params.padding = Padding::VALID;
- params.filter_height = 2;
- params.filter_width = 2;
- params.stride_height = 2;
- params.stride_width = 2;
- params.activation = Activation::RELU6;
-
- MaxPool2D kernel(&input_tensor, &output_tensor, params);
- kernel.configure();
- _memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- std::vector<float> ref_output_data{0.0, 6.0};
- std::initializer_list<int32_t> ref_output_shape{1, 1, 2, 1};
- EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
-}
-
-TEST_F(MaxPool2DTest, SInt16)
-{
- Shape input_shape{1, 3, 5, 1};
- std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
- std::vector<float> input_data{
- 1, -1, 0, -2, 2, //
- -7, -6, -5, -4, -3, //
- 5, 4, 3, 6, 7, //
- };
- std::vector<float> ref_output_data{
- 1, 2, //
- 5, 6, //
- };
-
- Tensor input_tensor =
- makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
-
- Pool2DParams params{};
- params.padding = Padding::VALID;
- params.filter_height = 2;
- params.filter_width = 3;
- params.stride_height = 1;
- params.stride_width = 2;
- params.activation = Activation::RELU6;
-
- MaxPool2D kernel(&input_tensor, &output_tensor, params);
- kernel.configure();
- _memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
- EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Maximum.h"
-
-#include "kernels/Utils.h"
-
-#include "kernels/BinaryOpCommon.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Maximum::Maximum(const Tensor *input1, const Tensor *input2, Tensor *output)
- : Kernel({input1, input2}, {output})
-{
-}
-
-void Maximum::configure()
-{
- LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
- LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
- output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void Maximum::execute() const
-{
- switch (input1()->element_type())
- {
- case DataType::FLOAT32:
- evalMaximum<float>();
- break;
- case DataType::U8:
- evalMaximum<uint8_t>();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-template <typename T> inline void Maximum::evalMaximum() const
-{
- BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
- getTensorShape(input2()), getTensorData<T>(input2()),
- getTensorShape(output()), getTensorData<T>(output()),
- [](T x, T y) { return std::max(x, y); });
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Mean.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/reduce.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params)
-{
- params->axis_count = num_axes;
- for (int i = 0; i < num_axes; ++i)
- {
- params->axis[i] = static_cast<int16>(axes_data[i]);
- }
- for (int i = num_axes; i < 4; ++i)
- {
- params->axis[i] = 1;
- }
-}
-
-// Returns the number of axes that will be reduced. Removes duplicates.
-static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
-{
- int reduction_count = num_axes;
- for (int i = 0; i < num_axes; ++i)
- {
- int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
- assert(current >= 0 && current < input_num_dims);
- for (int j = 0; j < i; j++)
- {
- int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
- // This checks for duplicate axis
- if (current == previous)
- {
- --reduction_count;
- break;
- }
- }
- }
- return reduction_count;
-}
-
-static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
- bool keep_dims)
-{
- int input_num_dims = input_shape.num_dims();
- if (input_num_dims == 0)
- {
- return Shape(0);
- }
-
- if (keep_dims)
- {
- Shape output_shape(input_num_dims);
- for (int idx = 0; idx < input_num_dims; ++idx)
- {
- bool is_axis = false;
- for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
- {
- if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
- {
- is_axis = true;
- break;
- }
- }
- if (is_axis)
- {
- output_shape.dim(idx) = 1;
- }
- else
- {
- output_shape.dim(idx) = input_shape.dim(idx);
- }
- }
- return output_shape;
- }
- else
- {
- int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
- Shape output_shape(input_num_dims - num_reduce_axes);
- int num_skip_axes = 0;
- for (int idx = 0; idx < input_num_dims; ++idx)
- {
- bool is_axis = false;
- for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
- {
- if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
- {
- ++num_skip_axes;
- is_axis = true;
- break;
- }
- }
- if (!is_axis)
- {
- output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
- }
- }
- return output_shape;
- }
-}
-
-Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
- Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams ¶ms)
- : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum},
- params)
-{
-}
-
-void Mean::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
- LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
- if (input()->element_type() == DataType::S16)
- {
- LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
- }
-
- const Shape &input_shape = input()->shape();
- int input_num_dims = input_shape.num_dims();
-
- const auto *axes_data = getTensorData<int32_t>(axes());
- int num_axes = axes()->shape().num_elements();
- assert(num_axes <= 4);
-
- Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
- output()->resize(output_shape);
-
- tflite::MeanParams params{};
- resolveAxes(axes_data, num_axes, ¶ms);
- _need_temporaries = !(
- _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
- ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1)));
- if (_need_temporaries)
- {
- auto temp_index = getOutputTensors()[1];
- auto resolved_axes = getOutputTensors()[2];
- auto temp_sum = getOutputTensors()[3];
-
- temp_index->resize(Shape(input_num_dims));
- resolved_axes->resize(Shape(num_axes));
- temp_sum->resize(output()->shape());
- }
- else
- {
- auto temp_index = getOutputTensors()[1];
- auto resolved_axes = getOutputTensors()[2];
- auto temp_sum = getOutputTensors()[3];
-
- temp_index->set_allocatable(false);
- resolved_axes->set_allocatable(false);
- temp_sum->set_allocatable(false);
- }
-}
-
-void Mean::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- case DataType::S16:
- evalQuantizedS16();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Mean::evalFloat() const
-{
- const Shape &input_shape = input()->shape();
- int input_num_dims = input_shape.num_dims();
- const auto *axes_data = getTensorData<int32_t>(axes());
- int num_axes = axes()->shape().num_elements();
-
- tflite::MeanParams params{};
- resolveAxes(axes_data, num_axes, ¶ms);
-
- auto temp_index = getOutputTensors()[1];
- auto resolved_axes = getOutputTensors()[2];
- auto temp_sum = getOutputTensors()[3];
-
- // Defer to specialized implementation for 4D Mean across axes 1 & 2.
- if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
- ((params.axis[0] == 1 && params.axis[1] == 2) ||
- (params.axis[0] == 2 && params.axis[1] == 1)))
- {
- tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(output()), getTensorData<float>(output()));
- }
- else
- {
- tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(),
- input()->shape().num_dims(), getTensorData<float>(output()),
- getTensorShape(output()).DimsData(), output()->shape().num_dims(),
- axes_data, num_axes, _params.keep_dims,
- getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
- getTensorData<float>(temp_sum));
- }
-}
-
-void Mean::evalQuantized() const
-{
- const Shape &input_shape = input()->shape();
- int input_num_dims = input_shape.num_dims();
- const auto *axes_data = getTensorData<int32_t>(axes());
- int num_axes = axes()->shape().num_elements();
-
- tflite::MeanParams params{};
- resolveAxes(axes_data, num_axes, ¶ms);
-
- auto temp_index = getOutputTensors()[1];
- auto resolved_axes = getOutputTensors()[2];
- auto temp_sum = getOutputTensors()[3];
-
- // Defer to specialized implementation for 4D Mean across axes 1 & 2.
- if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
- ((params.axis[0] == 1 && params.axis[1] == 2) ||
- (params.axis[0] == 2 && params.axis[1] == 1)))
- {
- tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
- input()->zero_point(), input()->scale(), getTensorShape(output()),
- getTensorData<uint8_t>(output()), output()->zero_point(),
- output()->scale());
- }
- else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
- {
- tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
- input()->shape().num_dims(), getTensorData<uint8_t>(output()),
- getTensorShape(output()).DimsData(), output()->shape().num_dims(),
- axes_data, num_axes, _params.keep_dims,
- getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
- getTensorData<int>(temp_sum));
- }
- else
- {
- tflite::reference_ops::QuantizedMeanOrSum<>(
- getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
- getTensorShape(input()).DimsData(), input()->shape().num_dims(),
- getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
- getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
- _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
- getTensorData<int>(temp_sum),
- /*compute_sum=*/false);
- }
-}
-
-void Mean::evalQuantizedS16() const
-{
- const auto *input_data = getTensorData<int16_t>(input());
- auto *output_data = getTensorData<int16_t>(output());
-
- const Shape &input_shape = input()->shape();
- const Shape &output_shape = output()->shape();
-
- const auto *axes_data = getTensorData<int32_t>(axes());
- const int num_axes = axes()->shape().num_elements();
-
- constexpr int32_t output_min = -std::numeric_limits<int16_t>::max();
- constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
-
- // Defer to specialized implementation for 4D Mean across axes 1 & 2.
- if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 &&
- ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1)))
- {
- const int32_t batches = input_shape.dim(0);
- const int32_t input_height = input_shape.dim(1);
- const int32_t input_width = input_shape.dim(2);
- const int32_t depth = input_shape.dim(3);
- assert(output_shape.num_dims() == 4);
- assert(output_shape.dim(0) == batches);
- assert(output_shape.dim(1) == 1);
- assert(output_shape.dim(2) == 1);
- assert(output_shape.dim(3) == depth);
-
- const double real_multiplier =
- static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
-
- int32_t output_multiplier{};
- int output_shift{};
- quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
-
- const int32_t num_elements_in_axes = input_height * input_width;
-
- for (int32_t batch = 0; batch < batches; ++batch)
- {
- for (int32_t c = 0; c < depth; ++c)
- {
- int32_t acc = 0;
- for (int32_t in_y = 0; in_y < input_height; ++in_y)
- {
- for (int32_t in_x = 0; in_x < input_width; ++in_x)
- {
- acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)];
- }
- }
- int32_t scaled_acc =
- tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
- // Divide by the number of elements rounding to the nearest integer.
- scaled_acc = scaled_acc > 0
- ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
- : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
-
- scaled_acc = std::max(scaled_acc, output_min);
- scaled_acc = std::min(scaled_acc, output_max);
-
- output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc;
- }
- }
- }
- else
- {
- throw std::runtime_error("Unsupported configuration.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Minimum.h"
-
-#include "kernels/Utils.h"
-
-#include "kernels/BinaryOpCommon.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Minimum::Minimum(const Tensor *input1, const Tensor *input2, Tensor *output)
- : Kernel({input1, input2}, {output})
-{
-}
-
-void Minimum::configure()
-{
- LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
- LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
- output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void Minimum::execute() const
-{
- switch (input1()->element_type())
- {
- case DataType::FLOAT32:
- evalMinimum<float>();
- break;
- case DataType::U8:
- evalMinimum<uint8_t>();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-template <typename T> inline void Minimum::evalMinimum() const
-{
- BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
- getTensorShape(input2()), getTensorData<T>(input2()),
- getTensorShape(output()), getTensorData<T>(output()),
- [](T x, T y) { return std::min(x, y); });
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/MirrorPad.h"
-
-#include "kernels/Utils.h"
-
-#include <limits>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-MirrorPad::MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output,
- const MirrorPadParams ¶ms)
- : KernelWithParams<MirrorPadParams>({input, paddings}, {output}, params)
-{
-}
-
-void MirrorPad::configure()
-{
- const Shape &input_shape = input()->shape();
- const int num_dims = input_shape.num_dims();
-
- if (num_dims > 4)
- throw std::runtime_error("Unsupported number of dimensions.");
-
- assert(output()->element_type() == input()->element_type());
- assert(paddings()->element_type() == DataType::S32);
- // Paddings shape should be [N, 2].
- assert(paddings()->shape().num_dims() == 2);
- assert(paddings()->shape().dim(0) == num_dims);
- assert(paddings()->shape().dim(1) == 2);
-
- Shape output_shape(num_dims);
- const auto *paddings_data = getTensorData<int32_t>(paddings());
- for (int i = 0; i < num_dims; ++i)
- {
- const int32_t padding_before = paddings_data[i * 2];
- const int32_t padding_after = paddings_data[i * 2 + 1];
- assert(padding_before >= 0 && padding_after >= 0);
- output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
- }
-
- output()->resize(output_shape);
-}
-
-template <typename T>
-inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
- Tensor &output);
-
-void MirrorPad::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- {
- MirrorPadImpl<float>(*input(), *paddings(), params().mode, *output());
- break;
- }
- case DataType::U8:
- {
- assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
- assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
-
- MirrorPadImpl<uint8_t>(*input(), *paddings(), params().mode, *output());
- break;
- }
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-template <typename T>
-inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
- Tensor &output)
-{
- auto const input_dims = input.shape().num_dims();
- auto const input_data = input.data<T>();
- auto const paddings_data = paddings.data<int32_t>();
- auto const output_data = output.data<T>();
-
- auto const input_b = input_dims > 3 ? input.shape().dim(input_dims - 4) : 1;
- auto const input_h = input_dims > 2 ? input.shape().dim(input_dims - 3) : 1;
- auto const input_w = input_dims > 1 ? input.shape().dim(input_dims - 2) : 1;
- auto const input_d = input.shape().dim(input_dims - 1);
-
- auto const input_h_offset = input_d * input_w;
- auto const input_b_offset = input_h_offset * input_h;
-
- auto const output_b = input_dims > 3 ? output.shape().dim(input_dims - 4) : 1;
- auto const output_h = input_dims > 2 ? output.shape().dim(input_dims - 3) : 1;
- auto const output_w = input_dims > 1 ? output.shape().dim(input_dims - 2) : 1;
- auto const output_d = output.shape().dim(input_dims - 1);
-
- auto const left_b_pad = paddings_data[2 * (input_dims - 4)];
- auto const left_h_pad = paddings_data[2 * (input_dims - 3)];
- auto const left_w_pad = paddings_data[2 * (input_dims - 2)];
- auto const left_d_pad = paddings_data[2 * (input_dims - 1)];
-
- auto const right_b_pad = paddings_data[2 * (input_dims - 4) + 1];
- auto const right_h_pad = paddings_data[2 * (input_dims - 3) + 1];
- auto const right_w_pad = paddings_data[2 * (input_dims - 2) + 1];
- auto const right_d_pad = paddings_data[2 * (input_dims - 1) + 1];
-
- const auto positive_mod = [](auto a, auto b) { return (a % b + b) % b; };
- const auto offset_index = [input_d, input_h_offset, input_b_offset](auto d, auto w, auto h,
- auto b) {
- return d + w * input_d + h * input_h_offset + b * input_b_offset;
- };
-
- const auto symmetric_dim = [&positive_mod](auto i, auto left_pad, auto input) {
- bool reflected = (((i < left_pad ? i + 1 - input : i) - left_pad) / input & 1) == 1;
- return positive_mod(reflected ? input + left_pad - i - 1 : i - left_pad, input);
- };
-
- const T *in_ptr = input_data;
- T *out_ptr = output_data;
-
- for (int32_t b = 0; b < output_b; ++b)
- {
- for (int32_t h = 0; h < output_h; ++h)
- {
- for (int32_t w = 0; w < output_w; ++w)
- {
- for (int32_t d = 0; d < output_d; ++d)
- {
- if (b < left_b_pad || b >= output_b - right_b_pad || //
- h < left_h_pad || h >= output_h - right_h_pad || //
- w < left_w_pad || w >= output_w - right_w_pad || //
- d < left_d_pad || d >= output_d - right_d_pad)
- {
- if (mode == MirrorPadMode::REFLECT)
- {
- *out_ptr++ = input_data[offset_index(
- positive_mod(d - left_d_pad, input_d), positive_mod(w - left_w_pad, input_w),
- positive_mod(h - left_h_pad, input_h), positive_mod(b - left_b_pad, input_b))];
- }
- else
- {
- *out_ptr++ = input_data[offset_index(
- symmetric_dim(d, left_d_pad, input_d), symmetric_dim(w, left_w_pad, input_w),
- symmetric_dim(h, left_h_pad, input_h), symmetric_dim(b, left_b_pad, input_b))];
- }
- }
- else
- {
- *out_ptr++ = *in_ptr++;
- }
- }
- }
- }
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Mul.h"
-
-#include "kernels/BinaryOpCommon.h"
-#include "kernels/Utils.h"
-
-#include "PALMul.h"
-
-#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams ¶ms)
- : KernelWithParams<MulParams>({input1, input2}, {output}, params)
-{
-}
-
-void Mul::configure()
-{
- LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
- LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type());
- if (input1()->element_type() == DataType::S16)
- {
- LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
- input2()->zero_points().size() == 1)
- LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
- output()->zero_point() == 0);
- }
-
- output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void Mul::execute() const
-{
- switch (input1()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::S64:
- evalInteger<int64_t>();
- break;
- case DataType::S32:
- evalInteger<int32_t>();
- break;
- case DataType::S16:
- evalQuantizedS16();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Mul::evalFloat() const
-{
- tflite::ArithmeticParams params{};
- fillArithmeticActivationRange<float>(params, _params.activation);
-
- const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
-
- if (need_broadcast)
- {
- luci_interpreter_pal::BroadcastMul4DSlow(
- params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
- getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
- }
- else
- {
- luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
- getTensorShape(input2()), getTensorData<float>(input2()),
- getTensorShape(output()), getTensorData<float>(output()));
- }
-}
-
-template <typename T> void Mul::evalInteger() const
-{
- tflite::ArithmeticParams params{};
- fillArithmeticActivationRange<T>(params, _params.activation);
-
- const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
-
- if (need_broadcast)
- {
- luci_interpreter_pal::BroadcastMul4DSlow(
- params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
- getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
- }
- else
- {
- luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()),
- getTensorShape(input2()), getTensorData<T>(input2()),
- getTensorShape(output()), getTensorData<T>(output()));
- }
-}
-
-void Mul::evalQuantizedS16() const
-{
- const auto input1_scale = static_cast<double>(input1()->scale());
- const auto input2_scale = static_cast<double>(input2()->scale());
- const auto output_scale = static_cast<double>(output()->scale());
-
- const double real_multiplier = input1_scale * input2_scale / output_scale;
-
- int32_t output_multiplier;
- int output_shift;
- quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
-
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- auto fn = [output_multiplier, output_shift, activation_min, activation_max](int16_t input1_val,
- int16_t input2_val) {
- int32_t output = static_cast<int32_t>(input1_val) * static_cast<int32_t>(input2_val);
- output = tflite::MultiplyByQuantizedMultiplier(output, output_multiplier, output_shift);
- output = std::max(output, activation_min);
- output = std::min(output, activation_max);
- return static_cast<int16_t>(output);
- };
-
- BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
- getTensorShape(input2()), getTensorData<int16_t>(input2()),
- getTensorShape(output()), getTensorData<int16_t>(output()), fn);
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Neg.h"
-#include "kernels/Utils.h"
-
-#include "PALNeg.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Neg::Neg(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Neg::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-
- output()->resize(input()->shape());
-}
-
-void Neg::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Neg::evalFloat() const
-{
- luci_interpreter_pal::Negate(getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(output()), getTensorData<float>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/NotEqual.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-NotEqual::NotEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
-
-void NotEqual::configure()
-{
- LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
- LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
-
- if (x()->element_type() == DataType::U8)
- {
- quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
- quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
- }
- output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
-}
-
-void NotEqual::execute() const
-{
- switch (x()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::S64:
- evalInteger<int64_t>();
- break;
- case DataType::S32:
- evalInteger<int32_t>();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void NotEqual::evalFloat() const
-{
- const auto x_data = getTensorData<float>(x());
- const auto y_data = getTensorData<float>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowNotEqual(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
- else
- {
- tflite::reference_ops::NotEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
- y_data, getTensorShape(output()), output_data);
- }
-}
-
-template <typename T> void NotEqual::evalInteger() const
-{
- const auto x_data = getTensorData<T>(x());
- const auto y_data = getTensorData<T>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowNotEqualNoScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
- else
- {
- tflite::reference_ops::NotEqualNoScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data, getTensorShape(output()),
- output_data);
- }
-}
-
-void NotEqual::evalQuantized() const
-{
- const auto x_data = getTensorData<uint8_t>(x());
- const auto y_data = getTensorData<uint8_t>(y());
- auto output_data = getTensorData<bool>(output());
-
- tflite::ComparisonParams op_params;
- op_params.left_shift = 8;
- op_params.input1_offset = -x()->zero_point(); // Note the '-'
- op_params.input1_shift = _x_shift;
- op_params.input1_multiplier = _x_multiplier;
- op_params.input2_offset = -y()->zero_point(); // Note the '-'
- op_params.input2_shift = _y_shift;
- op_params.input2_multiplier = _y_multiplier;
- op_params.is_broadcast = x()->shape() != y()->shape();
-
- if (op_params.is_broadcast)
- {
- tflite::reference_ops::Broadcast4DSlowNotEqualWithScaling(
- op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
- output_data);
- }
- else
- {
- tflite::reference_ops::NotEqualWithScaling(op_params, getTensorShape(x()), x_data,
- getTensorShape(y()), y_data,
- getTensorShape(output()), output_data);
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/OneHot.h"
-#include "kernels/Utils.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-namespace
-{
-
-template <typename T>
-void OneHotComputeImpl(const Tensor *indices_tensor, const Tensor *on_value_tensor,
- const Tensor *off_value_tensor, int32_t depth, int32_t axis,
- Tensor *output_tensor)
-{
- // define input shape and correct axis
- auto const &input_shape = indices_tensor->shape();
- axis = axis == -1 ? input_shape.num_dims() : axis;
-
- // TODO support other integer input types
- auto const *indices = getTensorData<int32_t>(indices_tensor);
- auto const on_value = getTensorData<T>(on_value_tensor)[0];
- auto const off_value = getTensorData<T>(off_value_tensor)[0];
- auto *output = getTensorData<T>(output_tensor);
-
- // prefix_dim_size == # of elements before the axis
- // depth == # of elements per axis
- // suffix_dim_size == # of elements after the axis
- auto prefix_dim_size = 1;
- for (int32_t i = 0; i < axis; ++i)
- {
- prefix_dim_size *= input_shape.dim(i);
- }
- assert(prefix_dim_size > 0);
- auto const suffix_dim_size = input_shape.num_elements() / prefix_dim_size;
-
- // View the indices as a matrix of size:
- // prefix_dim_size x suffix_dim_size
- // View the output as a matrix of size:
- // prefix_dim_size x depth x suffix_dim_size
- // Then the output is:
- // output(i, j, k) == (indices(i, k) == j) ? on : off
- for (int32_t i = 0; i < prefix_dim_size; ++i)
- for (int32_t j = 0; j < depth; ++j)
- for (int32_t k = 0; k < suffix_dim_size; ++k, ++output)
- *output = indices[i * suffix_dim_size + k] == j ? on_value : off_value;
-}
-
-} // namespace
-
-OneHot::OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
- const Tensor *off_value, Tensor *output, const OneHotParams ¶ms)
- : KernelWithParams<OneHotParams>({indices, depth, on_value, off_value}, {output}, params)
-{
- // Do nothing
-}
-
-void OneHot::configure()
-{
- // check types
- LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32);
- LUCI_INTERPRETER_CHECK(depth()->element_type() == DataType::S32);
- LUCI_INTERPRETER_CHECK(on_value()->element_type() == off_value()->element_type());
- LUCI_INTERPRETER_CHECK(output()->element_type() == on_value()->element_type());
-
- // check shape dependent parameters
- LUCI_INTERPRETER_CHECK(on_value()->shape().num_elements() == 1);
- LUCI_INTERPRETER_CHECK(off_value()->shape().num_elements() == 1);
- LUCI_INTERPRETER_CHECK(depth()->shape().num_elements() == 1);
- LUCI_INTERPRETER_CHECK(params().axis >= -1 && params().axis <= indices()->shape().num_dims());
-
- // define parameters that affect the output shape
- auto const depth_value = getTensorData<int32_t>(depth())[0];
- auto const &input_shape = indices()->shape();
- auto const input_dims = input_shape.num_dims();
- auto const axis = params().axis == -1 ? input_dims : params().axis;
-
- // define output shape
- Shape output_shape(input_shape.num_dims() + 1);
- {
- for (int32_t d = 0; d < axis; ++d)
- output_shape.dim(d) = input_shape.dim(d);
-
- output_shape.dim(axis) = depth_value;
-
- for (int32_t d = axis + 1; d < output_shape.num_dims(); ++d)
- output_shape.dim(d) = input_shape.dim(d - 1);
- }
-
- // reshape output
- output()->resize(output_shape);
-}
-
-void OneHot::execute() const
-{
- auto const depth_value = getTensorData<int32_t>(depth())[0];
- auto const axis = params().axis;
-
- switch (output()->element_type())
- {
- case loco::DataType::FLOAT32:
- OneHotComputeImpl<float>(indices(), on_value(), off_value(), depth_value, axis, output());
- break;
- case loco::DataType::U8:
- OneHotComputeImpl<uint8_t>(indices(), on_value(), off_value(), depth_value, axis, output());
- break;
- case loco::DataType::S16:
- OneHotComputeImpl<int16_t>(indices(), on_value(), off_value(), depth_value, axis, output());
- break;
- default:
- // TODO Support other data types
- throw std::runtime_error("Not supported, yet!");
- break;
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/PRelu.h"
-
-#include "kernels/BinaryOpCommon.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
-#include <tensorflow/lite/kernels/internal/reference/prelu.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-PRelu::PRelu(const Tensor *input, const Tensor *alpha, Tensor *output)
- : Kernel({input, alpha}, {output})
-{
-}
-
-PRelu::~PRelu()
-{
- // Destructor declared to delete vector of alpha quantized data properly
-}
-
-void PRelu::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
- LUCI_INTERPRETER_CHECK(alpha()->element_type() == output()->element_type());
- LUCI_INTERPRETER_CHECK(input()->scales().size() <= 1);
- LUCI_INTERPRETER_CHECK(output()->scales().size() <= 1);
-
- if (input()->element_type() == DataType::U8)
- {
- LUCI_INTERPRETER_CHECK(alpha()->scales().size() <= 1); // remove when CWQ kernel arrives
- _alpha_multipliers.resize(1);
- double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale();
- quantizeMultiplier(alpha_multiplier, &_alpha_multipliers[0].multiplier,
- &_alpha_multipliers[0].shift);
- double identity_multiplier = input()->scale() / output()->scale();
- quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
- }
- else if (input()->element_type() == DataType::S16)
- {
- // Common check for correctness of quant params
- LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
- for (size_t channel = 0; channel < alpha()->zero_points().size(); ++channel)
- {
- LUCI_INTERPRETER_CHECK(alpha()->zero_points()[channel] == 0);
- }
- // PRelu specific checks for CWQ
- LUCI_INTERPRETER_CHECK(alpha()->quantized_dimension() == alpha()->shape().num_dims() - 1);
- LUCI_INTERPRETER_CHECK(static_cast<int32_t>(alpha()->scales().size()) ==
- alpha()->shape().dim(alpha()->quantized_dimension()));
- LUCI_INTERPRETER_CHECK(alpha()->shape().num_elements() ==
- input()->shape().dim(input()->shape().num_dims() - 1));
-
- // all dimension of alpha except last one should be size 1
- for (int dim = 0; dim < alpha()->shape().num_dims() - 1; ++dim)
- {
- LUCI_INTERPRETER_CHECK(alpha()->shape().dim(dim) == 1);
- }
-
- std::vector<double> real_multipliers =
- getQuantizedConvolutionMultiplers(input()->scale(), alpha()->scales(), output()->scale());
-
- _alpha_multipliers = quantizeMultipliers(real_multipliers);
-
- double identity_multiplier = input()->scale() / output()->scale();
- quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
- }
- output()->resize(calculateShapeForBroadcast(input()->shape(), alpha()->shape()));
-}
-
-void PRelu::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- case DataType::S16:
- evalQuantizedS16();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void PRelu::evalFloat() const
-{
- const auto input_data = getTensorData<float>(input());
- const auto alpha_data = getTensorData<float>(alpha());
- const auto size = getTensorShape(input()).FlatSize();
- auto output_data = getTensorData<float>(output());
-
- auto PReluFunc = [](float input, float alpha) { return input >= 0.0 ? input : input * alpha; };
-
- if (input()->shape() != alpha()->shape())
- {
- tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
- getTensorShape(input()), getTensorData<float>(input()), getTensorShape(alpha()),
- getTensorData<float>(alpha()), getTensorShape(output()), getTensorData<float>(output()),
- PReluFunc);
- }
- else
- {
- for (auto i = decltype(size){0}; i < size; ++i)
- {
- if (input_data[i] >= 0)
- output_data[i] = input_data[i];
- else
- output_data[i] = input_data[i] * alpha_data[i];
- }
- }
-}
-
-void PRelu::evalQuantized() const
-{
- tflite::PreluParams op_params{};
-
- op_params.input_offset = -input()->zero_point(); // Note the '-'.
- op_params.alpha_offset = -alpha()->zero_point(); // Note the '-'.
- op_params.output_offset = output()->zero_point();
- op_params.output_shift_1 = _output_shift_identity;
- op_params.output_multiplier_1 = _output_multiplier_identity;
- op_params.output_shift_2 = _alpha_multipliers[0].shift;
- op_params.output_multiplier_2 = _alpha_multipliers[0].multiplier;
-
- if (input()->shape() != alpha()->shape())
- {
- tflite::reference_ops::BroadcastPrelu4DSlow(
- op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
- getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
- }
- else
- {
- tflite::reference_ops::Prelu<uint8_t>(
- op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
- getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
- }
-}
-
-static inline int16_t evalElemS16PRelu(int16_t input_val, int16_t alpha_val,
- const ChannelQuantMultipliers &identity_mult,
- const ChannelQuantMultipliers &alpha_mult)
-{
- constexpr int32_t quantized_min = std::numeric_limits<int16_t>::min();
- constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max();
-
- const int32_t output_val =
- input_val >= 0
- ? tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val),
- identity_mult.multiplier, identity_mult.shift)
- : tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val * alpha_val),
- alpha_mult.multiplier, alpha_mult.shift);
- const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val));
- return clamped_output;
-}
-
-void PRelu::evalQuantizedS16() const
-{
- // Note that this kernel assumes alpha is CWQ
- tflite::RuntimeShape input_shape = getTensorShape(input());
- const int16_t *input_data = input()->data<int16_t>();
- const int16_t *alpha_data = alpha()->data<int16_t>();
- int16_t *output_data = output()->data<int16_t>();
-
- const ChannelQuantMultipliers pos_mult{_output_shift_identity, _output_multiplier_identity};
-
- const int last_dim = input()->shape().num_dims() - 1;
-
- int32_t outer_dims_size = 1;
- for (int i = 0; i < last_dim; ++i)
- outer_dims_size *= input_shape.Dims(i);
- int32_t quant_dim_size = input_shape.Dims(last_dim);
-
- for (int32_t outer_dims = 0; outer_dims < outer_dims_size; ++outer_dims)
- for (int32_t quant_channel = 0; quant_channel < quant_dim_size; ++quant_channel)
- {
- const ChannelQuantMultipliers &neg_mult = _alpha_multipliers[quant_channel];
- size_t offset = static_cast<size_t>(outer_dims) * static_cast<size_t>(quant_dim_size);
- offset += quant_channel;
-
- output_data[offset] =
- evalElemS16PRelu(input_data[offset], alpha_data[quant_channel], pos_mult, neg_mult);
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Pack.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Pack::Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams ¶ms)
- : KernelWithParams<PackParams>(std::move(inputs), {output}, params)
-{
-}
-
-void Pack::configure()
-{
- LUCI_INTERPRETER_CHECK(_inputs.size() == static_cast<uint32_t>(params().values_count));
- const Tensor *t0 = _inputs[0];
- const int dimension_size = t0->shape().num_dims() + 1;
- int axis = params().axis;
- if (axis < 0)
- {
- axis += dimension_size;
- }
- LUCI_INTERPRETER_CHECK(axis >= 0 && axis <= t0->shape().num_dims());
-
- if (t0->element_type() != DataType::S32 && t0->element_type() != DataType::FLOAT32 &&
- t0->element_type() != DataType::U8 && t0->element_type() != DataType::S8 &&
- t0->element_type() != DataType::S16 && t0->element_type() != DataType::S64)
- {
- throw std::runtime_error("Unsupported type.");
- }
-
- for (uint32_t i = 1; i < _inputs.size(); ++i)
- {
- const Tensor *tensor = _inputs[i];
- LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
- LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
- for (int d = 0; d < t0->shape().num_dims(); ++d)
- {
- LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
- }
- }
-
- Shape output_shape(dimension_size);
- int i = 0;
- for (int index = 0; index < dimension_size; ++index)
- {
- if (index == axis)
- {
- output_shape.dim(index) = params().values_count;
- }
- else
- {
- output_shape.dim(index) = t0->shape().dim(i++);
- }
- }
-
- if (t0->element_type() == DataType::U8 || t0->element_type() == DataType::S8 ||
- t0->element_type() == DataType::S16)
- {
- LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point());
- LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale());
- // Guarantee input/output quantization params match as we do not support
- // packing quantized tensors.
- for (int i = 0; i < params().values_count; i++)
- {
- LUCI_INTERPRETER_CHECK(_inputs[i]->zero_point() == t0->zero_point());
- LUCI_INTERPRETER_CHECK(_inputs[i]->scale() == t0->scale());
- }
- }
-
- output()->resize(output_shape);
-}
-
-void Pack::execute() const
-{
- switch (_inputs[0]->element_type())
- {
- case DataType::FLOAT32:
- evalGeneric<float>();
- break;
- case DataType::U8:
- evalGeneric<uint8_t>();
- break;
- case DataType::S8:
- evalGeneric<int8_t>();
- break;
- case DataType::S16:
- evalGeneric<int16_t>();
- break;
- case DataType::S32:
- evalGeneric<int32_t>();
- break;
- case DataType::S64:
- evalGeneric<int64_t>();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-template <typename T> void Pack::evalGeneric() const
-{
- const Tensor *t0 = _inputs[0];
- const int dimension_size = t0->shape().num_dims() + 1;
- int axis = params().axis;
- if (axis < 0)
- {
- axis += dimension_size;
- }
-
- VectorOfTensors<T, true> inputs(_inputs);
- tflite::PackParams params{};
- params.axis = axis;
- params.inputs_count = _inputs.size();
- tflite::reference_ops::Pack<T>(params, inputs.shapes(), inputs.data(), getTensorShape(output()),
- getTensorData<T>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Pad.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/pad.h>
-
-#include <limits>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Pad::Pad(const Tensor *input, const Tensor *paddings, Tensor *output)
- : Kernel({input, paddings}, {output})
-{
-}
-
-void Pad::configure()
-{
- const Shape &input_shape = input()->shape();
- const int num_dims = input_shape.num_dims();
-
- if (num_dims > 4)
- throw std::runtime_error("Unsupported number of dimensions.");
-
- assert(output()->element_type() == input()->element_type());
- assert(paddings()->element_type() == DataType::S32);
- // Paddings shape should be [N, 2].
- assert(paddings()->shape().num_dims() == 2);
- assert(paddings()->shape().dim(0) == num_dims);
- assert(paddings()->shape().dim(1) == 2);
-
- Shape output_shape(num_dims);
- const auto *paddings_data = getTensorData<int32_t>(paddings());
- for (int i = 0; i < num_dims; ++i)
- {
- const int32_t padding_before = paddings_data[i * 2];
- const int32_t padding_after = paddings_data[i * 2 + 1];
- assert(padding_before >= 0 && padding_after >= 0);
- output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
- }
-
- output()->resize(output_shape);
-}
-
-void Pad::execute() const
-{
- const int num_dims = input()->shape().num_dims();
-
- tflite::PadParams params{};
- params.left_padding_count = num_dims;
- params.right_padding_count = num_dims;
-
- const auto *paddings_data = getTensorData<int32_t>(paddings());
- for (int i = num_dims - 1; i >= 0; --i)
- {
- params.left_padding[i] = paddings_data[i * 2];
- params.right_padding[i] = paddings_data[i * 2 + 1];
- }
-
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- {
- const float pad_value = 0.0f;
- tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
- &pad_value, getTensorShape(output()),
- getTensorData<float>(output()));
- break;
- }
- case DataType::U8:
- {
- assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
- assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
- const auto pad_value = static_cast<uint8_t>(output()->zero_point());
- tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
- &pad_value, getTensorShape(output()),
- getTensorData<uint8_t>(output()));
- break;
- }
- case DataType::S8:
- {
- assert(output()->zero_point() >= std::numeric_limits<int8_t>::min());
- assert(output()->zero_point() <= std::numeric_limits<int8_t>::max());
- const auto pad_value = static_cast<int8_t>(output()->zero_point());
- tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<int8_t>(input()),
- &pad_value, getTensorShape(output()),
- getTensorData<int8_t>(output()));
- break;
- }
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/PadV2.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/pad.h>
-
-#include <limits>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-PadV2::PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values,
- Tensor *output)
- : Kernel({input, paddings, constant_values}, {output})
-{
-}
-
-void PadV2::configure()
-{
- const Shape &input_shape = input()->shape();
- const int num_dims = input_shape.num_dims();
-
- if (num_dims > 4)
- throw std::runtime_error("Unsupported number of dimensions.");
-
- assert(output()->element_type() == input()->element_type());
- assert(paddings()->element_type() == DataType::S32);
- assert(constant_values()->element_type() == output()->element_type());
- // Paddings shape should be [N, 2].
- assert(paddings()->shape().num_dims() == 2);
- assert(paddings()->shape().dim(0) == num_dims);
- assert(paddings()->shape().dim(1) == 2);
- // Constant values elements number should be 1.
- assert(constant_values()->shape().num_elements() == 1);
-
- Shape output_shape(num_dims);
- const auto *paddings_data = getTensorData<int32_t>(paddings());
- for (int i = 0; i < num_dims; ++i)
- {
- const int32_t padding_before = paddings_data[i * 2];
- const int32_t padding_after = paddings_data[i * 2 + 1];
- assert(padding_before >= 0 && padding_after >= 0);
- output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
- }
-
- output()->resize(output_shape);
-}
-
-void PadV2::execute() const
-{
- const int num_dims = input()->shape().num_dims();
-
- tflite::PadParams params{};
- params.left_padding_count = num_dims;
- params.right_padding_count = num_dims;
-
- const auto *paddings_data = getTensorData<int32_t>(paddings());
- for (int i = num_dims - 1; i >= 0; --i)
- {
- params.left_padding[i] = paddings_data[i * 2];
- params.right_padding[i] = paddings_data[i * 2 + 1];
- }
-
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- {
- const auto pad_value = getTensorData<float>(constant_values())[0];
- tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
- &pad_value, getTensorShape(output()),
- getTensorData<float>(output()));
- break;
- }
- case DataType::U8:
- {
- assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
- assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
- const auto pad_value = getTensorData<uint8_t>(constant_values())[0];
- tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
- &pad_value, getTensorShape(output()),
- getTensorData<uint8_t>(output()));
- break;
- }
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Pow.h"
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Pow::Pow(const Tensor *input1, const Tensor *input2, Tensor *output)
- : Kernel({input1, input2}, {output})
-{
-}
-
-void Pow::configure()
-{
- LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
- LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
-
- output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void Pow::execute() const
-{
- switch (input1()->element_type())
- {
- case DataType::FLOAT32:
- eval<float>();
- break;
- case DataType::S32:
- eval<int32_t>();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-template <typename T> void Pow::eval() const
-{
- tflite::ArithmeticParams params{};
-
- const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
-
- if (need_broadcast)
- {
- tflite::reference_ops::BroadcastPow4DSlow(getTensorShape(input1()), getTensorData<T>(input1()),
- getTensorShape(input2()), getTensorData<T>(input2()),
- getTensorShape(output()), getTensorData<T>(output()));
- }
- else
- {
- tflite::reference_ops::Pow(getTensorShape(input1()), getTensorData<T>(input1()),
- getTensorShape(input2()), getTensorData<T>(input2()),
- getTensorShape(output()), getTensorData<T>(output()));
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Quantize.h"
-#include "kernels/Utils.h"
-#include "PALQuantize.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-namespace
-{
-
-template <typename input_dtype> void call_requantize(const Tensor *input, Tensor *output)
-{
- int32_t multiplier;
- int shift;
-
- const double effective_output_scale = input->scale() / output->scale();
- quantizeMultiplier(effective_output_scale, &multiplier, &shift);
-
- const auto input_shape = getTensorShape(input);
- const auto output_shape = getTensorShape(output);
- const auto size = tflite::MatchingFlatSize(input_shape, output_shape);
-
- const auto input_data = getTensorData<input_dtype>(input);
-
- switch (output->element_type())
- {
- case loco::DataType::S8:
- luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
- output->zero_point(), getTensorData<int8_t>(output));
- break;
- case loco::DataType::U8:
- luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
- output->zero_point(), getTensorData<uint8_t>(output));
- break;
- case loco::DataType::S16:
- luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
- output->zero_point(), getTensorData<int16_t>(output));
- break;
- default:
- throw std::runtime_error("Unsupported quantized type, yet!");
- }
-}
-
-} // namespace
-
-Quantize::Quantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Quantize::configure()
-{
-
- if (input()->element_type() == loco::DataType::S16)
- LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
-
- switch (input()->element_type())
- {
- case loco::DataType::FLOAT32:
- {
- LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::U8 ||
- output()->element_type() == loco::DataType::S8 ||
- output()->element_type() == loco::DataType::S16);
- break;
- }
- case loco::DataType::S16:
- case loco::DataType::S8:
- case loco::DataType::U8:
- {
- LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8 ||
- output()->element_type() == loco::DataType::U8 ||
- output()->element_type() == loco::DataType::S16);
- if (output()->element_type() == loco::DataType::S16)
- {
- LUCI_INTERPRETER_CHECK(output()->zero_point() == 0);
- }
- break;
- }
- default:
- throw std::runtime_error("Unsupported type");
- }
-
- output()->resize(input()->shape());
-}
-
-void Quantize::execute() const
-{
- switch (input()->element_type())
- {
- case loco::DataType::FLOAT32:
- {
- tflite::QuantizationParams op_params;
- op_params.zero_point = output()->zero_point();
- op_params.scale = output()->scale();
- const auto input_data = getTensorData<float>(input());
-
- switch (output()->element_type())
- {
- case loco::DataType::S8:
- {
- luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
- getTensorShape(output()), getTensorData<int8_t>(output()));
- break;
- }
- case loco::DataType::U8:
- {
- luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
- getTensorShape(output()),
- getTensorData<uint8_t>(output()));
- break;
- }
- case loco::DataType::S16:
- {
- luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
- getTensorShape(output()),
- getTensorData<int16_t>(output()));
- break;
- }
- default:
- throw std::runtime_error("Unsupported type.");
- }
- break;
- }
- case loco::DataType::S16:
- {
- call_requantize<int16_t>(input(), output());
- break;
- }
- case loco::DataType::S8:
- {
- call_requantize<int8_t>(input(), output());
- break;
- }
- case loco::DataType::U8:
- {
- call_requantize<uint8_t>(input(), output());
- break;
- }
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Relu.h"
-#include "kernels/Utils.h"
-
-#include "PALRelu.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Relu::Relu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Relu::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
- if (input()->element_type() == DataType::S16)
- {
- LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
- }
-
- if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
- {
- double multiplier = input()->scale() / output()->scale();
- quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
- }
- output()->resize(input()->shape());
-}
-
-void Relu::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- case DataType::S16:
- evalQuantizedS16();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Relu::evalFloat() const
-{
- const auto input_data = getTensorData<float>(input());
- const auto input_shape = getTensorShape(input());
- auto output_data = getTensorData<float>(output());
- auto output_shape = getTensorShape(output());
-
- luci_interpreter_pal::Relu(input_shape, input_data, output_shape, output_data);
-}
-
-void Relu::evalQuantized() const
-{
- tflite::ReluParams params;
- params.input_offset = input()->zero_point();
- params.output_offset = output()->zero_point();
- params.output_multiplier = _output_multiplier;
- params.output_shift = _output_shift;
-
- params.quantized_activation_min =
- std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
- params.quantized_activation_max = static_cast<int32_t>(std::numeric_limits<uint8_t>::max());
-
- luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
- getTensorShape(output()), getTensorData<uint8_t>(output()));
-}
-
-void Relu::evalQuantizedS16() const
-{
- const auto *input_data = getTensorData<int16_t>(input());
- auto *output_data = getTensorData<int16_t>(output());
-
- constexpr int32_t output_min = 0;
- constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
-
- const int32_t num_elements = input()->shape().num_elements();
-
- for (int32_t i = 0; i < num_elements; ++i)
- {
- const int32_t input_val = input_data[i];
- int32_t output_val =
- tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier, _output_shift);
- output_val = std::max(output_val, output_min);
- output_val = std::min(output_val, output_max);
- output_data[i] = static_cast<int16_t>(output_val);
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Relu6.h"
-#include "kernels/Utils.h"
-
-#include "PALRelu6.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Relu6::Relu6(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Relu6::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-
- if (input()->element_type() == DataType::U8)
- {
- double multiplier = input()->scale() / output()->scale();
- quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
- }
- output()->resize(input()->shape());
-}
-
-void Relu6::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Relu6::evalFloat() const
-{
- const auto input_data = getTensorData<float>(input());
- const auto input_shape = getTensorShape(input());
- auto output_data = getTensorData<float>(output());
- auto output_shape = getTensorShape(output());
-
- luci_interpreter_pal::Relu6(input_shape, input_data, output_shape, output_data);
-}
-
-void Relu6::evalQuantized() const
-{
- tflite::ReluParams params;
- params.input_offset = input()->zero_point();
- params.output_offset = output()->zero_point();
- params.output_multiplier = _output_multiplier;
- params.output_shift = _output_shift;
-
- params.quantized_activation_min =
- std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
- params.quantized_activation_max =
- std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()),
- params.output_offset + static_cast<int32>(roundf(6.f / output()->scale())));
-
- luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
- getTensorShape(output()), getTensorData<uint8_t>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Reshape.h"
-
-#include <cassert>
-#include <cstring>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-static Shape extractShapeFromTensor(const Tensor *tensor)
-{
- assert(tensor->element_type() == DataType::S32);
- Shape shape(tensor->shape().num_elements());
- const auto *shape_data = tensor->data<int32_t>();
- for (int i = 0; i < tensor->shape().num_elements(); ++i)
- {
- shape.dim(i) = shape_data[i];
- }
- return shape;
-}
-
-static void resolveUnknownDimension(const Shape &input_shape, Shape *output_shape)
-{
- const int32_t num_input_elements = input_shape.num_elements();
- int32_t num_output_elements = 1;
- int unknown_dim_index = -1;
- for (int i = 0; i < output_shape->num_dims(); ++i)
- {
- const int32_t value = output_shape->dim(i);
- if (value == -1)
- {
- assert(unknown_dim_index == -1);
- unknown_dim_index = i;
- }
- else
- {
- num_output_elements *= value;
- }
- }
- if (unknown_dim_index != -1)
- {
- output_shape->dim(unknown_dim_index) = num_input_elements / num_output_elements;
- num_output_elements *= output_shape->dim(unknown_dim_index);
- }
- assert(num_output_elements == num_input_elements);
-}
-
-Reshape::Reshape(const Tensor *input, const Tensor *shape, Tensor *output)
- : Kernel({input, shape}, {output})
-{
-}
-
-void Reshape::configure()
-{
- Shape output_shape = extractShapeFromTensor(shape());
- resolveUnknownDimension(input()->shape(), &output_shape);
- output()->resize(output_shape);
-}
-
-void Reshape::execute() const
-{
- const auto *input_data = input()->data<void>();
- auto *output_data = output()->data<void>();
-
- const size_t element_size = getDataTypeSize(input()->element_type());
- const int32_t num_elements = input()->shape().num_elements();
- std::memcpy(output_data, input_data, num_elements * element_size);
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_RESHAPE_H
-#define LUCI_INTERPRETER_KERNELS_RESHAPE_H
-
-#include "core/Kernel.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class Reshape : public Kernel
-{
-public:
- Reshape(const Tensor *input, const Tensor *shape, Tensor *output);
-
- const Tensor *input() const { return _inputs[0]; }
- const Tensor *shape() const { return _inputs[1]; }
- Tensor *output() const { return _outputs[0]; }
-
- void configure() override;
- void execute() const override;
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_RESHAPE_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Reshape.h"
-#include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-class ReshapeTest : public ::testing::Test
-{
-protected:
- void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
-
- std::unique_ptr<IMemoryManager> _memory_manager;
-};
-
-// TODO Test types other than FLOAT32.
-
-TEST_F(ReshapeTest, Regular)
-{
- Shape input_shape{1, 2, 2, 3};
- std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
- Shape shape_shape{2};
- std::vector<int32_t> shape_data{3, 4};
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
- Tensor shape_tensor =
- makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
- Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
- kernel.configure();
- _memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
-}
-
-TEST_F(ReshapeTest, UnknownDimension)
-{
- Shape input_shape{2, 1, 2, 3};
- std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
- Shape shape_shape{3};
- std::vector<int32_t> shape_data{2, -1, 2};
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
- Tensor shape_tensor =
- makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
-
- Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
- kernel.configure();
- _memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/ResizeBilinear.h"
-
-#include "kernels/Utils.h"
-
-#include "PALResizeBilinear.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-ResizeBilinear::ResizeBilinear(const Tensor *input, const Tensor *size, Tensor *output,
- const ResizeBilinearParams ¶ms)
- : KernelWithParams<ResizeBilinearParams>({input, size}, {output}, params)
-{
-}
-
-void ResizeBilinear::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
- LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
- LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
- if (params().half_pixel_centers && params().align_corners)
- throw std::runtime_error("If half_pixel_centers is True, align_corners must be False.");
- LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
- Shape output_shape(4);
- output_shape.dim(0) = input()->shape().dim(0);
- output_shape.dim(1) = getTensorData<int32_t>(size())[0];
- output_shape.dim(2) = getTensorData<int32_t>(size())[1];
- output_shape.dim(3) = input()->shape().dim(3);
- output()->resize(output_shape);
-}
-
-void ResizeBilinear::execute() const
-{
- tflite::ResizeBilinearParams op_params{};
- op_params.align_corners = params().align_corners;
- op_params.half_pixel_centers = params().half_pixel_centers;
- switch (output()->element_type())
- {
- case DataType::FLOAT32:
- luci_interpreter_pal::ResizeBilinear(
- op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()),
- getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output()));
- break;
- case DataType::U8:
- luci_interpreter_pal::ResizeBilinear(
- op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
- getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/ResizeNearestNeighbor.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
-#include "PALResizeNearestNeighbor.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-ResizeNearestNeighbor::ResizeNearestNeighbor(const Tensor *input, const Tensor *size,
- Tensor *output,
- const ResizeNearestNeighborParams ¶ms)
- : KernelWithParams<ResizeNearestNeighborParams>({input, size}, {output}, params)
-{
-}
-
-void ResizeNearestNeighbor::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
- LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
- LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
- LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
- Shape output_shape(4);
- output_shape.dim(0) = input()->shape().dim(0);
- output_shape.dim(1) = getTensorData<int32_t>(size())[0];
- output_shape.dim(2) = getTensorData<int32_t>(size())[1];
- output_shape.dim(3) = input()->shape().dim(3);
- output()->resize(output_shape);
-}
-
-void ResizeNearestNeighbor::execute() const
-{
- tflite::ResizeNearestNeighborParams op_params{};
- op_params.align_corners = params().align_corners;
- op_params.half_pixel_centers = params().half_pixel_centers;
- switch (output()->element_type())
- {
- case DataType::FLOAT32:
- tflite::reference_ops::ResizeNearestNeighbor(
- op_params, getTensorShape(input()), getTensorData<int32_t>(input()), getTensorShape(size()),
- getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<int32_t>(output()));
- break;
- case DataType::U8:
- luci_interpreter_pal::ResizeNearestNeighbor(
- op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
- getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/ReverseV2.h"
-#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-ReverseV2::ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output)
- : Kernel({input, axes}, {output})
-{
-}
-
-void ReverseV2::configure()
-{
- assert(axes()->shape().num_dims() == 1);
- assert(input()->shape().num_dims() >= axes()->shape().num_elements());
- if (input()->element_type() != DataType::S32 && input()->element_type() != DataType::FLOAT32 &&
- input()->element_type() != DataType::U8 && input()->element_type() != DataType::S16 &&
- input()->element_type() != DataType::S64)
- {
- throw std::runtime_error("Unsupported input type.");
- }
- if (axes()->element_type() != DataType::S32)
- {
- throw std::runtime_error("Unsupported axes type.");
- }
- if (axes()->shape().num_elements() > 1)
- {
- throw std::runtime_error("Current implementation does not support more than 1 axis.");
- }
- int axis_value = getTensorData<int32_t>(axes())[0];
- if (axis_value < 0 || axis_value >= input()->shape().num_dims())
- {
- throw std::runtime_error("Invalid axes value");
- }
- assert(input()->element_type() == output()->element_type());
-
- output()->resize(input()->shape());
-}
-
-void ReverseV2::execute() const
-{
- int axis_value = getTensorData<int32_t>(axes())[0];
- switch (output()->element_type())
- {
- case DataType::FLOAT32:
- tflite::reference_ops::Reverse<float>(axis_value, getTensorShape(input()),
- getTensorData<float>(input()), getTensorShape(output()),
- getTensorData<float>(output()));
- break;
- case DataType::U8:
- tflite::reference_ops::Reverse<uint8_t>(
- axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()),
- getTensorShape(output()), getTensorData<uint8_t>(output()));
- break;
- default:
- throw std::runtime_error("Unsupported output type");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Rsqrt.h"
-#include "kernels/Utils.h"
-
-#include <stdexcept>
-#include <cmath>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Rsqrt::Rsqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Rsqrt::configure()
-{
- if (input()->element_type() != output()->element_type())
- {
- throw std::runtime_error("Input/output tensor data type mismatch.");
- }
- output()->resize(input()->shape());
-}
-
-void Rsqrt::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
-
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Rsqrt::evalFloat() const
-{
- auto in = getTensorData<float>(input());
- auto out = getTensorData<float>(output());
- auto size = getTensorShape(input()).FlatSize();
- for (auto i = in; i != in + size; ++i)
- {
- *out = 1.f / std::sqrt(*i);
- ++out;
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/SVDF.h"
-#include "kernels/Utils.h"
-#include "PALSVDF.h"
-
-#include <tensorflow/lite/kernels/internal/quantization_util.h>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-namespace
-{
-TfLiteFusedActivation get_tflite_activation(Activation activation)
-{
- switch (activation)
- {
- case luci::FusedActFunc::RELU:
- return kTfLiteActRelu;
- case luci::FusedActFunc::RELU6:
- return kTfLiteActRelu6;
- case luci::FusedActFunc::RELU_N1_TO_1:
- return kTfLiteActReluN1To1;
- case luci::FusedActFunc::TANH:
- return kTfLiteActTanh;
- case luci::FusedActFunc::SIGN_BIT:
- return kTfLiteActSignBit;
- case luci::FusedActFunc::NONE:
- return kTfLiteActNone;
- default:
- throw std::runtime_error("Unsupported activation type");
- }
-}
-} // namespace
-
-SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
- const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
- Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
- Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
- const SVDFParams ¶ms)
- : KernelWithParams<SVDFParams>({input, weight_feature, weight_time, bias, input_activation_state},
- {output, scratchpad_activation_state, scratchpad_1, scratchpad_2,
- scratchpad_3, scratchpad_4, scratchpad_5, scratchpad_6},
- params)
-{
- // Do nothing
-}
-
-void SVDF::configure()
-{
- const Shape &input_shape = input()->shape();
- const Shape &weight_features_shape = weight_feature()->shape();
- const Shape &weight_time_shape = weight_time()->shape();
-
- // Validate Input Tensor:
- LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32 ||
- input()->element_type() == loco::DataType::S8);
- LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 2);
-
- // Validate inputs and output types
- if (input()->element_type() == loco::DataType::S8)
- {
- LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::S8);
- LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::S16 ||
- weight_time()->element_type() == loco::DataType::S8);
- if (bias())
- LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::S32);
-
- LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::S16 ||
- input_activation_state()->element_type() == loco::DataType::S8);
- LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8);
-
- // Note: now tflite support only ReLU activation for integer SVDF
- LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::RELU);
- }
- else if (weight_feature()->element_type() == loco::DataType::FLOAT32)
- {
- LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::FLOAT32);
- LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::FLOAT32);
- LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::FLOAT32);
- if (bias())
- LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::FLOAT32);
- LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
- }
- else if ((weight_feature()->element_type() == loco::DataType::U8 ||
- weight_feature()->element_type() == loco::DataType::S8) &&
- input()->element_type() == loco::DataType::FLOAT32)
- {
- // TODO:: support hybrid SVDF op
- throw std::runtime_error("Hybrid type is not currently supported");
- }
- else
- {
- throw std::runtime_error("Unsupported type.");
- }
-
- // Check all the parameters of tensor match within themselves and match the
- // input configuration.
- const int rank = params().svdf_rank;
- const int batch_size = input_shape.dim(0);
- const int num_filters = weight_features_shape.dim(0);
- LUCI_INTERPRETER_CHECK(rank != 0);
- LUCI_INTERPRETER_CHECK(num_filters % rank == 0);
-
- const int num_units = num_filters / rank;
- const int memory_size = weight_time_shape.dim(1);
-
- // Validate Weight_Feature Input Tensor:
- LUCI_INTERPRETER_CHECK(weight_features_shape.num_dims() == 2);
- LUCI_INTERPRETER_CHECK(weight_features_shape.dim(1) == input_shape.dim(1));
-
- // Validate Weight_Time Input Tensor:
- LUCI_INTERPRETER_CHECK(weight_time_shape.num_dims() == 2);
- LUCI_INTERPRETER_CHECK(weight_time_shape.dim(0) == num_filters);
-
- // Validate Bias
- if (bias())
- LUCI_INTERPRETER_CHECK(bias()->shape().dim(0) == num_units);
-
- // Validate Input Activation State
- LUCI_INTERPRETER_CHECK(input_activation_state()->shape().num_dims() == 2);
- LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(0) == batch_size);
- LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(1) == memory_size * num_filters);
-
- // Resize scratchpad_state to input_activation_state
- auto scratchpad_activation_state = getOutputTensors()[1];
- scratchpad_activation_state->resize({batch_size, memory_size * num_filters});
-
- // Resize output tensor
- output()->resize({batch_size, num_units});
-
- luci_interpreter_pal::SetupScratchpadTensor(
- input()->element_type(), weight_feature()->element_type(), getOutputTensors()[2],
- getOutputTensors()[3], getOutputTensors()[4], getOutputTensors()[5], getOutputTensors()[6],
- getOutputTensors()[7], input_shape, weight_time_shape, batch_size, num_filters, num_units);
-}
-
-void SVDF::execute() const
-{
- switch (weight_feature()->element_type())
- {
- case loco::DataType::FLOAT32:
- evalFloat();
- break;
- case loco::DataType::S8:
- {
- if (input()->element_type() == loco::DataType::S8)
- evalInteger();
- else
- // TODO:: support hybrid SVDF op
- throw std::runtime_error("Hybrid type is not currently supported");
- break;
- }
- default:
- throw std::runtime_error("Unsupported type");
- }
-}
-
-void SVDF::evalInteger() const
-{
- const auto effective_scale_1 = static_cast<double>(input()->scale() * weight_feature()->scale() /
- input_activation_state()->scale());
- const auto effective_scale_2 = static_cast<double>(input_activation_state()->scale() *
- weight_time()->scale() / output()->scale());
-
- int32_t effective_scale_1_a;
- int effective_scale_1_b;
- int32_t effective_scale_2_a;
- int effective_scale_2_b;
-
- tflite::QuantizeMultiplier(effective_scale_1, &effective_scale_1_a, &effective_scale_1_b);
- tflite::QuantizeMultiplier(effective_scale_2, &effective_scale_2_a, &effective_scale_2_b);
-
- TfLiteSVDFParams params_svdf{};
- params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
- params_svdf.rank = params().svdf_rank;
- params_svdf.activation = get_tflite_activation(params().activation);
-
- auto scratchpad_activation_state = getOutputTensors()[1];
- // Note: it is expected that activation_state input variable tensor reset to zero,
- // also expected that this variable tensor doesn't have buffer
- auto scratchpad_data = getTensorData<int16_t>(scratchpad_activation_state);
- std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
-
- auto scratchpad = getOutputTensors()[2];
- auto output_temp = getOutputTensors()[3];
-
- int32_t input_zp = input()->zero_point();
- int32_t output_zp = output()->zero_point();
- luci_interpreter_pal::IntegerSVDF(
- params_svdf, getTensorShape(input()), getTensorData<int8_t>(input()),
- getTensorShape(weight_feature()), getTensorData<int8_t>(weight_feature()),
- getTensorShape(weight_time()), getTensorData<int16_t>(weight_time()), getTensorShape(bias()),
- getTensorData<int32_t>(bias()), scratchpad_data, getTensorShape(output()),
- getTensorData<int8_t>(output()), getTensorData<int32_t>(scratchpad),
- getTensorData<int32_t>(output_temp), effective_scale_1_a, effective_scale_1_b,
- effective_scale_2_a, effective_scale_2_b, input_zp, output_zp);
-}
-
-void SVDF::evalFloat() const
-{
- TfLiteSVDFParams params_svdf{};
- params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
- params_svdf.rank = params().svdf_rank;
- params_svdf.activation = get_tflite_activation(params().activation);
-
- auto scratchpad_activation_state = getOutputTensors()[1];
- // Note: it is expected that activation_state input variable tensor reset to zero,
- // also expected that this variable tensor doesn't have buffer
- auto scratchpad_data = getTensorData<float>(scratchpad_activation_state);
- std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
-
- auto scratchpad_1 = getOutputTensors()[2];
-
- luci_interpreter_pal::FloatSVDF(
- params_svdf, getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(weight_feature()), getTensorData<float>(weight_feature()),
- getTensorShape(weight_time()), getTensorData<float>(weight_time()), getTensorShape(bias()),
- getTensorData<float>(bias()), getTensorData<float>(scratchpad_1), scratchpad_data,
- getTensorShape(output()), getTensorData<float>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Shape.h"
-#include "kernels/Utils.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-ShapeKernel::ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams ¶ms)
- : KernelWithParams<ShapeParams>({input}, {output}, params)
-{
-}
-
-void ShapeKernel::configure()
-{
- LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S32 or
- output()->element_type() == DataType::S64);
- const auto input_shape = input()->shape();
-
- Shape output_shape(1);
- output_shape.dim(0) = input_shape.num_dims();
-
- output()->resize(output_shape);
-}
-
-void ShapeKernel::execute() const
-{
- switch (params().out_type)
- {
- case DataType::S32:
- evalInt<int32_t>();
- break;
- case DataType::S64:
- evalInt<int64_t>();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-template <typename T> void ShapeKernel::evalInt() const
-{
- const auto input_shape = input()->shape();
-
- auto output_data = getTensorData<T>(output());
-
- for (int i = 0; i < input_shape.num_dims(); ++i)
- {
- output_data[i] = input_shape.dim(i);
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Slice.h"
-#include "Utils.h"
-#include "PALSlice.h"
-
-#include <cassert>
-#include <cstring>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-const int max_dim = 4;
-
-Slice::Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output)
- : Kernel({input, begin, size}, {output})
-{
-}
-
-template <typename T>
-Shape calculateOutputShape(const Tensor *input, const Tensor *begin, const Tensor *size)
-{
- Shape output_shape = Shape(input->shape().num_dims());
- for (int idx = 0; idx < input->shape().num_dims(); idx++)
- {
- T size_value = getTensorData<T>(size)[idx];
- if (size_value < 0)
- {
- if (size_value != -1)
- {
- throw std::runtime_error("Invalid size.");
- }
- size_value = input->shape().dim(idx) - getTensorData<T>(begin)[idx];
- }
- else
- {
- if (input->shape().dim(idx) < getTensorData<T>(begin)[idx] + size_value)
- {
- throw std::runtime_error("Invalid begin and size.");
- }
- }
- output_shape.dim(idx) = static_cast<int>(size_value);
- }
- return output_shape;
-}
-
-template <typename T>
-void getBeginAndSizeVectors(int dimensions, const Tensor *begin, const Tensor *size,
- std::vector<int> *begins, std::vector<int> *sizes)
-{
- for (int idx = dimensions - 1; idx >= 0; --idx)
- {
- begins->push_back(getTensorData<T>(begin)[idx]);
- sizes->push_back(getTensorData<T>(size)[idx]);
- }
-}
-
-void Slice::configure()
-{
- assert(input()->element_type() == output()->element_type());
- assert(begin()->element_type() == DataType::S32 || begin()->element_type() == DataType::S64);
- assert(size()->element_type() == DataType::S32 || size()->element_type() == DataType::S64);
- assert(begin()->shape().num_dims() == 1);
- assert(size()->shape().num_dims() == 1);
- assert(input()->shape().num_dims() <= max_dim);
-
- if (begin()->element_type() == DataType::S32)
- {
- output()->resize(calculateOutputShape<int32_t>(input(), begin(), size()));
- }
- else if (begin()->element_type() == DataType::S64)
- {
- output()->resize(calculateOutputShape<int64_t>(input(), begin(), size()));
- }
- else
- {
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Slice::execute() const
-{
- std::vector<int> begins;
- begins.reserve(max_dim);
- std::vector<int> sizes;
- sizes.reserve(max_dim);
- if (begin()->element_type() == DataType::S32)
- {
- getBeginAndSizeVectors<int32_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
- }
- else if (begin()->element_type() == DataType::S64)
- {
- getBeginAndSizeVectors<int64_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
- }
- else
- {
- throw std::runtime_error("Unsupported begin type.");
- }
- for (int i = input()->shape().num_dims(); i < max_dim; ++i)
- {
- begins.push_back(0);
- sizes.push_back(1);
- }
-
- assert(begins.size() == 4);
- assert(sizes.size() == 4);
- tflite::SliceParams op_params{};
- op_params.begin_count = 4;
- op_params.size_count = 4;
- for (int i = 0; i < 4; i++)
- {
- op_params.begin[i] = begins[3 - i];
- op_params.size[i] = sizes[3 - i];
- }
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- luci_interpreter_pal::Slice(op_params, getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(output()), getTensorData<float>(output()));
- break;
- case DataType::U8:
- luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
- getTensorData<uint8_t>(input()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
- break;
- case DataType::S8:
- luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
- getTensorData<int8_t>(input()), getTensorShape(output()),
- getTensorData<int8_t>(output()));
- break;
- default:
- throw std::runtime_error("Unsupported input type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Softmax.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/softmax.h>
-#include "PALSoftmax.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Softmax::Softmax(const Tensor *input, Tensor *output, const SoftmaxParams ¶ms)
- : KernelWithParams<SoftmaxParams>({input}, {output}, params)
-{
-}
-
-void Softmax::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
- LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= 1);
- if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8)
- {
- LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8 || output()->zero_point() == 0);
- LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8 ||
- output()->zero_point() == std::numeric_limits<int8_t>::min());
- tflite::SoftmaxParams op_params{};
- op_params.table = _table;
- luci_interpreter_pal::PopulateSoftmaxLookupTable(&op_params, input()->scale(), params().beta);
- }
- output()->resize(input()->shape());
-}
-
-void Softmax::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::S8:
- evalQuantized<int8_t>();
- break;
- case DataType::U8:
- evalQuantized<uint8_t>();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Softmax::evalFloat() const
-{
- tflite::SoftmaxParams op_params{};
- op_params.beta = params().beta;
-
- tflite::reference_ops::Softmax(op_params, getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(output()), getTensorData<float>(output()));
-}
-
-template <typename T> void Softmax::evalQuantized() const
-{
- tflite::SoftmaxParams op_params{};
- op_params.table = const_cast<float *>(_table);
- op_params.zero_point = output()->zero_point();
- op_params.scale = output()->scale();
- luci_interpreter_pal::InitializeParams(&op_params, input()->scale(), params().beta);
- luci_interpreter_pal::Softmax(op_params, getTensorShape(input()), getTensorData<T>(input()),
- getTensorShape(output()), getTensorData<T>(output()));
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_SOFTMAX_H
-#define LUCI_INTERPRETER_KERNELS_SOFTMAX_H
-
-#include "core/Kernel.h"
-#include "core/KernelParams.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-class Softmax : public KernelWithParams<SoftmaxParams>
-{
-public:
- Softmax(const Tensor *input, Tensor *output, const SoftmaxParams ¶ms);
-
- const Tensor *input() const { return _inputs[0]; }
- Tensor *output() const { return _outputs[0]; }
-
- void configure() override;
- void execute() const override;
-
-private:
- void evalFloat() const;
- template <typename T> void evalQuantized() const;
-
- float _table[256];
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_SOFTMAX_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Softmax.h"
-#include "kernels/TestUtils.h"
-#include "luci_interpreter/TestMemoryManager.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-using namespace testing;
-
-template <typename T> constexpr loco::DataType toLocoDataType();
-
-template <> constexpr loco::DataType toLocoDataType<float>() { return loco::DataType::FLOAT32; }
-
-template <> constexpr loco::DataType toLocoDataType<uint8_t>() { return loco::DataType::U8; }
-
-template <> constexpr loco::DataType toLocoDataType<int8_t>() { return loco::DataType::S8; }
-
-template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
-void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
- std::initializer_list<float> input_data, std::initializer_list<float> output_data)
-{
- std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-
- Tensor input_tensor =
- makeInputTensor<toLocoDataType<T>()>(input_shape, input_data, memory_manager.get());
- Tensor output_tensor = makeOutputTensor(toLocoDataType<T>());
-
- SoftmaxParams params{};
- params.beta = 0.1;
-
- Softmax kernel(&input_tensor, &output_tensor, params);
- kernel.configure();
- memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
- EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
-}
-
-template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
-void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
- std::initializer_list<float> input_data, std::initializer_list<float> output_data)
-{
- std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
-
- std::pair<float, int32_t> input_quant_param =
- quantizationParams<T>(std::min<float>(std::min<float>(input_data), 0.f),
- std::max<float>(std::max<float>(input_data), 0.f));
- std::pair<float, int32_t> output_quant_param =
- quantizationParams<T>(std::min<float>(std::min<float>(output_data), 0.f),
- std::max<float>(std::max<float>(output_data), 0.f));
- Tensor input_tensor = makeInputTensor<toLocoDataType<T>()>(input_shape, input_quant_param.first,
- input_quant_param.second, input_data,
- memory_manager.get());
- Tensor output_tensor =
- makeOutputTensor(toLocoDataType<T>(), output_quant_param.first, output_quant_param.second);
-
- SoftmaxParams params{};
- params.beta = 0.1;
-
- Softmax kernel(&input_tensor, &output_tensor, params);
- kernel.configure();
- memory_manager->allocate_memory(output_tensor);
- kernel.execute();
-
- EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
- EXPECT_THAT(dequantizeTensorData(output_tensor),
- FloatArrayNear(output_data, output_tensor.scale()));
-}
-
-template <typename T> class SoftmaxTest : public ::testing::Test
-{
-};
-
-using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
-TYPED_TEST_SUITE(SoftmaxTest, DataTypes);
-
-TYPED_TEST(SoftmaxTest, Simple)
-{
- Check<TypeParam>({2, 1, 2, 3}, {2, 1, 2, 3},
- {
- 5, -9, 8, //
- -7, 2, -4, //
- 1, -2, 9, //
- 3, -6, -1, //
- },
- {
- 0.38514, 0.09497, 0.51989, //
- 0.20792, 0.51141, 0.28067, //
- 0.25212, 0.18678, 0.56110, //
- 0.48149, 0.19576, 0.32275, //
- });
-}
-
-} // namespace
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/SpaceToBatchND.h"
-#include "kernels/Utils.h"
-
-#include "PALSpaceToBatchND.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace
-{
-
-const int kInputMinDimensionNum = 3;
-const int kInputMaxDimensionNum = 4;
-
-} // namespace
-
-SpaceToBatchND::SpaceToBatchND(const Tensor *input, const Tensor *block_shape,
- const Tensor *paddings, Tensor *output)
- : Kernel({input, block_shape, paddings}, {output})
-{
-}
-
-void SpaceToBatchND::configure()
-{
- const auto *block_shape_data = block_shape()->data<int32_t>();
- const auto *paddings_data = paddings()->data<int32_t>();
- LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
- LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
-
- int spatial_dims_num = input()->shape().num_dims() - 2;
-
- LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
- LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
-
- LUCI_INTERPRETER_CHECK(paddings()->shape().num_dims() == 2);
- LUCI_INTERPRETER_CHECK(paddings()->shape().dim(0) == spatial_dims_num);
- LUCI_INTERPRETER_CHECK(paddings()->shape().dim(1) == 2);
-
- Shape output_shape = Shape(input()->shape().num_dims());
- int output_batch_size = input()->shape().dim(0);
- for (int i = 0; i < spatial_dims_num; ++i)
- {
- int final_dim_size =
- (input()->shape().dim(i + 1) + paddings_data[i * 2] + paddings_data[i * 2 + 1]);
- LUCI_INTERPRETER_CHECK(final_dim_size % block_shape_data[i] == 0);
- output_shape.dim(i + 1) = final_dim_size / block_shape_data[i];
- output_batch_size = output_batch_size * block_shape_data[i];
- }
- output_shape.dim(0) = output_batch_size;
- output_shape.dim(input()->shape().num_dims() - 1) =
- input()->shape().dim(input()->shape().num_dims() - 1);
- output()->resize(output_shape);
-}
-
-void SpaceToBatchND::execute() const
-{
- switch (input()->element_type())
- {
- tflite::SpaceToBatchParams op_params;
- case DataType::FLOAT32:
- op_params.output_offset = 0;
- luci_interpreter_pal::SpaceToBatchND(
- op_params, getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
- getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
- getTensorData<float>(output()));
- break;
- case DataType::U8:
- op_params.output_offset = output()->zero_point();
- luci_interpreter_pal::SpaceToBatchND(
- op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
- getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
- getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SpaceToDepth.h"
-#include "Utils.h"
-#include "PALSpaceToDepth.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-SpaceToDepth::SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams ¶ms)
- : KernelWithParams<SpaceToDepthParams>({input}, {output}, params)
-{
-}
-
-void SpaceToDepth::configure()
-{
- assert(input()->shape().num_dims() == 4);
- assert(output()->element_type() == DataType::FLOAT32 ||
- output()->element_type() == DataType::U8 || output()->element_type() == DataType::S8 ||
- output()->element_type() == DataType::S32 || output()->element_type() == DataType::S64);
- assert(input()->element_type() == output()->element_type());
-
- const int block_size = params().block_size;
- const int32_t input_height = input()->shape().dim(1);
- const int32_t input_width = input()->shape().dim(2);
- int32_t output_height = input_height / block_size;
- int32_t output_width = input_width / block_size;
-
- assert(input_height == output_height * block_size);
- assert(input_width == output_width * block_size);
-
- Shape output_shape(4);
- output_shape.dim(0) = input()->shape().dim(0);
- output_shape.dim(1) = output_height;
- output_shape.dim(2) = output_width;
- output_shape.dim(3) = input()->shape().dim(3) * block_size * block_size;
-
- output()->resize(output_shape);
-}
-
-void SpaceToDepth::execute() const
-{
- tflite::SpaceToDepthParams op_params{};
- op_params.block_size = params().block_size;
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
- getTensorData<float>(input()), getTensorShape(output()),
- getTensorData<float>(output()));
- break;
- case DataType::U8:
- luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
- getTensorData<uint8_t>(input()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Split.h"
-
-#include "Utils.h"
-
-#include "PALSplit.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Split::Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs)
- : Kernel({axis, input}, std::move(outputs))
-{
-}
-
-void Split::configure()
-{
- assert(axis()->shape().num_elements() == 1);
- _axis_value = getTensorData<int32_t>(axis())[0];
- if (_axis_value < 0)
- _axis_value += input()->shape().num_dims();
- assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
-
- const int32_t input_size = input()->shape().dim(_axis_value);
- assert(input_size % _outputs.size() == 0);
- const int32_t slice_size = input_size / _outputs.size();
-
- Shape output_shape = input()->shape();
- output_shape.dim(_axis_value) = slice_size;
- for (Tensor *output : _outputs)
- {
- output->resize(output_shape);
- }
-}
-
-void Split::execute() const
-{
- tflite::SplitParams params{};
- params.num_split = _outputs.size();
- params.axis = _axis_value;
-
-#define TF_LITE_SPLIT(scalar) \
- { \
- VectorOfTensors<scalar, false> all_outputs(_outputs); \
- luci_interpreter_pal::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
- all_outputs.shapes(), all_outputs.data()); \
- }
-
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- TF_LITE_SPLIT(float);
- break;
- case DataType::U8:
- TF_LITE_SPLIT(uint8_t);
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-#undef TF_LITE_SPLIT
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SplitV.h"
-
-#include "Utils.h"
-
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-SplitV::SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis,
- std::vector<Tensor *> outputs)
- : Kernel({input, size_splits, axis}, std::move(outputs))
-{
-}
-
-void SplitV::configure()
-{
- assert(axis()->shape().num_elements() == 1);
- _axis_value = getTensorData<int32_t>(axis())[0];
- if (_axis_value < 0)
- _axis_value += input()->shape().num_dims();
- assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
-
- auto num_split = static_cast<int32_t>(_outputs.size());
- auto sizes_data = getTensorData<int32_t>(size_splits());
-
- assert(size_splits()->shape().num_dims() == 1);
-
- int32_t sum = 0;
- const auto num_dims_size_spits = size_splits()->shape().dim(0);
- int32_t count_neg_dim = 0;
-
- for (int32_t i = 0; i < num_dims_size_spits - 1; ++i)
- {
- if (sizes_data[i] != -1)
- {
- sum += sizes_data[i];
- }
- else
- {
- count_neg_dim++;
- }
- }
- assert(count_neg_dim < 2);
- assert(size_splits()->shape().num_elements() == num_split);
-
- auto output_shape = input()->shape();
- for (int32_t i = 0; i < num_split; ++i)
- {
- if (sizes_data[i] == -1)
- {
- output_shape.dim(_axis_value) = input()->shape().dim(_axis_value) - sum;
- }
- else
- {
- output_shape.dim(_axis_value) = sizes_data[i];
- }
- _outputs[i]->resize(output_shape);
- }
-}
-
-void SplitV::execute() const
-{
- tflite::SplitParams params{};
- params.num_split = _outputs.size();
- params.axis = _axis_value;
-
-#define TF_LITE_SPLIT(scalar) \
- { \
- VectorOfTensors<scalar, false> all_outputs(_outputs); \
- tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
- all_outputs.shapes(), all_outputs.data()); \
- }
-
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- TF_LITE_SPLIT(float);
- break;
- case DataType::U8:
- TF_LITE_SPLIT(uint8_t);
- break;
- case DataType::S16:
- TF_LITE_SPLIT(int16_t);
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-#undef TF_LITE_SPLIT
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Sqrt.h"
-#include "kernels/Utils.h"
-
-#include <stdexcept>
-#include <cmath>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Sqrt::Sqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Sqrt::configure()
-{
- if (input()->element_type() != output()->element_type())
- {
- throw std::runtime_error("Input/output tensor data type mismatch.");
- }
- output()->resize(input()->shape());
-}
-
-void Sqrt::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
-
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Sqrt::evalFloat() const
-{
- auto in = getTensorData<float>(input());
- auto out = getTensorData<float>(output());
- auto size = getTensorShape(input()).FlatSize();
- for (auto i = in; i != in + size; ++i)
- {
- *out = std::sqrt(*i);
- ++out;
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Square.h"
-#include "kernels/Utils.h"
-
-#include <stdexcept>
-#include <cmath>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Square::Square(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Square::configure()
-{
- if (input()->element_type() != output()->element_type())
- {
- throw std::runtime_error("Input/output tensor data type mismatch.");
- }
- output()->resize(input()->shape());
-}
-
-void Square::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
-
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Square::evalFloat() const
-{
- auto in = getTensorData<float>(input());
- auto out = getTensorData<float>(output());
- auto size = getTensorShape(input()).FlatSize();
- for (auto i = in; i != in + size; ++i)
- {
- *out = (*i) * (*i);
- ++out;
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/SquaredDifference.h"
-
-#include "kernels/Utils.h"
-
-#include "kernels/BinaryOpCommon.h"
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-SquaredDifference::SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output)
- : Kernel({input1, input2}, {output})
-{
-}
-
-void SquaredDifference::configure()
-{
- LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
- LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
- output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void SquaredDifference::execute() const
-{
- switch (input1()->element_type())
- {
- case DataType::FLOAT32:
- evalSquaredDifference<float>();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-template <typename T> inline void SquaredDifference::evalSquaredDifference() const
-{
- BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
- getTensorShape(input2()), getTensorData<T>(input2()),
- getTensorShape(output()), getTensorData<T>(output()), [](T x, T y) {
- const T difference = x - y;
- return difference * difference;
- });
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Squeeze.h"
-
-#include "kernels/Utils.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Squeeze::Squeeze(const Tensor *input, Tensor *output, const SqueezeParams ¶ms)
- : KernelWithParams<SqueezeParams>({input}, {output}, params)
-{
-}
-
-void Squeeze::configure()
-{
- int input_num_dims = input()->shape().num_dims();
- int num_squeeze_dims = params().squeeze_dims.size();
- assert(input_num_dims <= 8);
- bool should_squeeze[8] = {false};
- int num_squeezed_dims = 0;
- if (num_squeeze_dims == 0)
- {
- for (int idx = 0; idx < input_num_dims; ++idx)
- {
- if (input()->shape().dim(idx) == 1)
- {
- should_squeeze[idx] = true;
- ++num_squeezed_dims;
- }
- }
- }
- else
- {
- for (int idx = 0; idx < num_squeeze_dims; ++idx)
- {
- int current = params().squeeze_dims[idx] < 0 ? params().squeeze_dims[idx] + input_num_dims
- : params().squeeze_dims[idx];
- assert(current >= 0 && current < input_num_dims && input()->shape().dim(current) == 1);
- if (!should_squeeze[current])
- ++num_squeezed_dims;
- should_squeeze[current] = true;
- }
- }
- Shape output_shape(input_num_dims - num_squeezed_dims);
- for (int in_idx = 0, out_idx = 0; in_idx < input_num_dims; ++in_idx)
- {
- if (!should_squeeze[in_idx])
- {
- output_shape.dim(out_idx++) = input()->shape().dim(in_idx);
- }
- }
- output()->resize(output_shape);
-}
-
-void Squeeze::execute() const
-{
- assert(input()->shape().num_elements() == output()->shape().num_elements());
-
- const auto *input_data = input()->data<void>();
- auto *output_data = output()->data<void>();
- std::memcpy(output_data, input_data,
- getDataTypeSize(input()->element_type()) * input()->shape().num_elements());
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/StridedSlice.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/strided_slice.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-StridedSlice::StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end,
- const Tensor *strides, Tensor *output, const StridedSliceParams ¶ms)
- : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params)
-{
-}
-
-void StridedSlice::configure()
-{
- assert(begin()->shape().num_dims() == 1);
- assert(end()->shape().num_dims() == 1);
- assert(strides()->shape().num_dims() == 1);
- assert(input()->element_type() == output()->element_type());
- assert(begin()->element_type() == DataType::S32);
- assert(end()->element_type() == DataType::S32);
- assert(strides()->element_type() == DataType::S32);
- assert(input()->shape().num_dims() <= 4);
- if (params().ellipsis_mask != 0)
- {
- throw std::runtime_error("ellipsis_mask is not implemented yet.");
- }
- if (params().new_axis_mask != 0)
- {
- throw std::runtime_error("new_axis_mask is not implemented yet.");
- }
- if (input()->element_type() == DataType::U8)
- {
- assert(input()->scale() == output()->scale());
- assert(input()->zero_point() == output()->zero_point());
- }
- tflite::StridedSliceParams op_params{};
- op_params.start_indices_count = input()->shape().num_dims();
- op_params.stop_indices_count = input()->shape().num_dims();
- op_params.strides_count = input()->shape().num_dims();
-
- for (int i = 0; i < input()->shape().num_dims(); i++)
- {
- op_params.start_indices[i] = getTensorData<int32_t>(begin())[i];
- op_params.stop_indices[i] = getTensorData<int32_t>(end())[i];
- op_params.strides[i] = getTensorData<int32_t>(strides())[i];
- }
- op_params.begin_mask = params().begin_mask;
- op_params.ellipsis_mask = 0;
- op_params.end_mask = params().end_mask;
- op_params.new_axis_mask = 0;
- op_params.shrink_axis_mask = params().shrink_axis_mask;
- std::vector<int32_t> output_shape_vector;
- for (int i = 0; i < input()->shape().num_dims(); i++)
- {
- int idx = input()->shape().num_dims() - i - 1;
- int32_t stride = getTensorData<int32_t>(strides())[idx];
- assert(stride != 0);
- int32_t begin = ::tflite::strided_slice::StartForAxis(op_params, getTensorShape(input()), idx);
- int32_t end =
- ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin);
-
- const bool shrink_axis = params().shrink_axis_mask & (1 << idx);
- if (shrink_axis)
- {
- end = begin + 1;
- }
-
- int32_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride));
- dim_shape = dim_shape < 0 ? 0 : dim_shape;
- if (!shrink_axis)
- {
- output_shape_vector.push_back(dim_shape);
- }
- }
- Shape output_shape = Shape(output_shape_vector.size());
- for (size_t i = 0; i < output_shape_vector.size(); i++)
- {
- output_shape.dim(i) = output_shape_vector[output_shape_vector.size() - i - 1];
- }
- output()->resize(output_shape);
-}
-
-void StridedSlice::execute() const
-{
- tflite::StridedSliceParams op_params{};
- op_params.start_indices_count = input()->shape().num_dims();
- op_params.stop_indices_count = input()->shape().num_dims();
- op_params.strides_count = input()->shape().num_dims();
-
- for (int i = 0; i < input()->shape().num_dims(); i++)
- {
- op_params.start_indices[i] = getTensorData<int32_t>(begin())[i];
- op_params.stop_indices[i] = getTensorData<int32_t>(end())[i];
- op_params.strides[i] = getTensorData<int32_t>(strides())[i];
- }
- op_params.begin_mask = params().begin_mask;
- op_params.ellipsis_mask = 0;
- op_params.end_mask = params().end_mask;
- op_params.new_axis_mask = 0;
- op_params.shrink_axis_mask = params().shrink_axis_mask;
-
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
- getTensorData<float>(input()), getTensorShape(output()),
- getTensorData<float>(output()));
- break;
- case DataType::U8:
- tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
- getTensorData<uint8_t>(input()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
- break;
- case DataType::S32:
- tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
- getTensorData<int32_t>(input()), getTensorShape(output()),
- getTensorData<int32_t>(output()));
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Sub.h"
-#include "kernels/Utils.h"
-
-#include "PALSub.h"
-
-#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams ¶ms)
- : KernelWithParams<SubParams>({input1, input2}, {output}, params)
-{
-}
-
-void Sub::configure()
-{
- LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type()))
- LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type()))
- output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
-}
-
-void Sub::execute() const
-{
- switch (input1()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::S64:
- evalInteger<int64_t>();
- break;
- case DataType::S32:
- evalInteger<int32_t>();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Sub::evalFloat() const
-{
- tflite::ArithmeticParams params{};
- fillArithmeticActivationRange<float>(params, _params.activation);
-
- const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
-
- if (need_broadcast)
- {
- tflite::reference_ops::BroadcastSubSlow(
- params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
- getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
- }
- else
- {
- luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
- getTensorShape(input2()), getTensorData<float>(input2()),
- getTensorShape(output()), getTensorData<float>(output()));
- }
-}
-
-template <typename T> void Sub::evalInteger() const
-{
- tflite::ArithmeticParams params{};
- fillArithmeticActivationRange<T>(params, _params.activation);
-
- const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
-
- if (need_broadcast)
- {
- tflite::reference_ops::BroadcastSubSlow(
- params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
- getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
- }
- else
- {
- tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()),
- getTensorShape(input2()), getTensorData<T>(input2()),
- getTensorShape(output()), getTensorData<T>(output()));
- }
-}
-
-void Sub::evalQuantized() const
-{
- const auto input1_scale = static_cast<double>(input1()->scale());
- const auto input2_scale = static_cast<double>(input2()->scale());
- const auto output_scale = static_cast<double>(output()->scale());
-
- const int left_shift = 20;
- const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
- const double real_input1_multiplier = input1_scale / twice_max_input_scale;
- const double real_input2_multiplier = input2_scale / twice_max_input_scale;
- const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
-
- int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
- int input1_shift{}, input2_shift{}, output_shift{};
- quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
- quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
- quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
-
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
-
- tflite::ArithmeticParams params{};
- params.left_shift = left_shift;
- // The kernel expects inputs' zero points to be negated.
- params.input1_offset = -input1()->zero_point(); // Note the '-'.
- params.input1_multiplier = input1_multiplier;
- params.input1_shift = input1_shift;
- params.input2_offset = -input2()->zero_point(); // Note the '-'.
- params.input2_multiplier = input2_multiplier;
- params.input2_shift = input2_shift;
- params.output_offset = output()->zero_point();
- params.output_multiplier = output_multiplier;
- params.output_shift = output_shift;
- params.quantized_activation_min = activation_min;
- params.quantized_activation_max = activation_max;
-
- const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
- getTensorShape(input1()), getTensorShape(input2()), ¶ms);
-
- if (need_broadcast)
- {
- tflite::reference_ops::BroadcastSubSlow(
- params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
- getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
- }
- else
- {
- tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
- getTensorShape(input2()), getTensorData<uint8_t>(input2()),
- getTensorShape(output()), getTensorData<uint8_t>(output()));
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Tanh.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/tanh.h>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-Tanh::Tanh(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
-
-void Tanh::configure()
-{
- LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
- if (input()->element_type() == DataType::U8)
- {
- populateLookupTable();
- }
- output()->resize(input()->shape());
-}
-
-void Tanh::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::U8:
- evalQuantized();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void Tanh::evalFloat() const
-{
- tflite::reference_ops::Tanh(getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(output()), getTensorData<float>(output()));
-}
-
-void Tanh::evalQuantized() const
-{
- const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
- uint8_t *output_data = getTensorData<uint8_t>(output());
- const uint8_t *input_data = getTensorData<uint8_t>(input());
- for (int i = 0; i < size; ++i)
- {
- output_data[i] = getTableValue(input_data[i]);
- }
-}
-
-void Tanh::populateLookupTable()
-{
- const auto input_scale = static_cast<double>(input()->scale());
- const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
- const auto output_scale = static_cast<double>(output()->scale());
- const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
- const float inverse_scale = 1 / output_scale;
- int32_t maxval = std::numeric_limits<uint8_t>::max();
- int32_t minval = std::numeric_limits<uint8_t>::min();
- for (int32_t val = minval; val <= maxval; ++val)
- {
- const float dequantized = input_scale * (val - input_zero_point);
- const float transformed = std::tanh(dequantized);
- const float rescaled = std::round(transformed * inverse_scale);
- const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
- setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
- static_cast<uint8_t>(val));
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/TestUtils.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-namespace testing
-{
-
-using ::testing::FloatNear;
-using ::testing::Matcher;
-
-Tensor makeOutputTensor(DataType element_type) { return Tensor(element_type, {}, {}, ""); }
-
-Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point)
-{
- return Tensor(element_type, {}, {{scale}, {zero_point}}, "");
-}
-
-std::vector<float> dequantizeTensorData(const Tensor &tensor)
-{
- if (tensor.element_type() == DataType::U8)
- {
- std::vector<uint8_t> data = extractTensorData<uint8_t>(tensor);
- return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
- }
- if (tensor.element_type() == DataType::S8)
- {
- std::vector<int8_t> data = extractTensorData<int8_t>(tensor);
- return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
- }
- else if (tensor.element_type() == DataType::S16)
- {
- // S16 quantization is symmetric, so zero point should be zero.
- for (auto zp : tensor.zero_points())
- {
- (void)zp;
- assert(zp == 0);
- }
-
- std::vector<int16_t> data = extractTensorData<int16_t>(tensor);
- if (tensor.scales().size() == 1)
- {
- return dequantize(data.data(), data.size(), tensor.scale(), 0);
- }
-
- // quantize_dimension breaks shape into two parts:
- // inner dimensions that contains continuous data with one quantization type
- // outer dimensions that contains other dimensions
- const Shape shape = tensor.shape();
- const int32_t quantized_dimension = tensor.quantized_dimension();
- assert(quantized_dimension < shape.num_dims());
- size_t outer_dims_size = 1;
- int32_t quant_dim_size = shape.dim(quantized_dimension);
- size_t inner_dims_size = 1;
- assert(quant_dim_size == tensor.scales().size());
-
- for (int i = 0; i < quantized_dimension; ++i)
- outer_dims_size *= shape.dim(i);
- for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i)
- inner_dims_size *= shape.dim(i);
-
- assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size);
-
- std::vector<float> dequantized_data;
- dequantized_data.reserve(shape.num_elements());
- for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it)
- for (int32_t channel = 0; channel < quant_dim_size; ++channel)
- {
- float scale = tensor.scales()[channel];
- size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
- std::vector<float> part_dequantized_data =
- dequantize(data.data() + offset, inner_dims_size, scale, 0);
- dequantized_data.insert(dequantized_data.end(), part_dequantized_data.begin(),
- part_dequantized_data.end());
- }
- return dequantized_data;
- }
- else
- {
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values, float max_abs_error)
-{
- std::vector<Matcher<float>> matchers;
- matchers.reserve(values.size());
- for (const float v : values)
- {
- matchers.emplace_back(FloatNear(v, max_abs_error));
- }
- return ElementsAreArray(matchers);
-}
-
-std::vector<int32_t> extractTensorShape(const Tensor &tensor)
-{
- std::vector<int32_t> result;
- int dims = tensor.shape().num_dims();
- for (int i = 0; i < dims; i++)
- {
- result.push_back(tensor.shape().dim(i));
- }
- return result;
-}
-
-} // namespace testing
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Transpose.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/transpose.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Transpose::Transpose(const Tensor *input, const Tensor *perm, Tensor *output)
- : Kernel({input, perm}, {output})
-{
-}
-
-void Transpose::configure()
-{
- // Transpose op only supports 1D-4D input arrays.
- int dims = input()->shape().num_dims();
- const int32_t *perm_data = getTensorData<int32_t>(perm());
-
- assert(input()->shape().num_dims() <= 4);
- assert(input()->element_type() == output()->element_type());
-
- assert(perm()->shape().num_dims() == 1);
- assert(perm()->shape().dim(0) == dims);
-
- Shape output_shape(dims);
- for (int i = 0; i < dims; i++)
- {
- assert(perm_data[i] < dims && perm_data[i] >= 0);
- output_shape.dim(i) = input()->shape().dim(perm_data[i]);
- }
-
- output()->resize(output_shape);
-}
-
-void Transpose::execute() const
-{
- tflite::TransposeParams params{};
- const int32_t *perm_data = getTensorData<int32_t>(perm());
- const int32_t size = perm()->shape().dim(0);
- params.perm_count = size;
- for (int i = 0; i < size; i++)
- params.perm[i] = perm_data[i];
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- tflite::reference_ops::Transpose(params, getTensorShape(input()),
- getTensorData<float>(input()), getTensorShape(output()),
- getTensorData<float>(output()));
- break;
- case DataType::U8:
- tflite::reference_ops::Transpose(params, getTensorShape(input()),
- getTensorData<uint8_t>(input()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/TransposeConv.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
- const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
- const TransposeConvParams ¶ms)
- : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias},
- {output, scratch_tensor}, params)
-{
-}
-
-TransposeConv::~TransposeConv()
-{
- // Define destructor here, to delete vector of qunatized multipliers properly
-}
-
-void TransposeConv::configure()
-{
- assert(output_shape()->shape().num_dims() == 1);
- assert(input()->shape().num_dims() == 4);
- assert(filter()->shape().num_dims() == 4);
- assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8 ||
- input()->element_type() == DataType::S16);
- assert(input()->element_type() == output()->element_type());
- assert(input()->shape().dim(3) == filter()->shape().dim(3));
-
- const int num_dims = output_shape()->shape().dim(0);
- Shape out_shape(num_dims);
- const auto *shape_data = getTensorData<int32_t>(output_shape());
- for (int i = 0; i < num_dims; i++)
- out_shape.dim(i) = shape_data[i];
- output()->resize(out_shape);
-
- const int32_t filter_height = filter()->shape().dim(1);
- const int32_t filter_width = filter()->shape().dim(2);
- const int32_t output_height = out_shape.dim(1);
- const int32_t output_width = out_shape.dim(2);
-
- const int32_t unused_output_height =
- computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
- const int32_t unused_output_width =
- computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
-
- _padding_height =
- computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
- _padding_width =
- computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
-
- if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
- {
- auto scratch_tensor = getOutputTensors()[1];
- scratch_tensor->resize(output()->shape());
- const std::vector<double> real_multipliers =
- getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
-
- _quant_multipliers = quantizeMultipliers(real_multipliers);
- }
- else
- {
- auto scratch_tensor = getOutputTensors()[1];
- scratch_tensor->set_allocatable(false);
- }
-}
-
-void TransposeConv::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- evalFloat();
- break;
- case DataType::U8:
- if (filter()->scales().size() == 1)
- {
- evalQuantized();
- }
- else if (filter()->scales().size() > 1)
- {
- LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
- LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
- static_cast<size_t>(filter()->shape().dim(0)));
- evalQuantizedPerChannel();
- }
- break;
- case DataType::S16:
- evalQuantizedS16();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-void TransposeConv::evalFloat() const
-{
- tflite::ConvParams op_params{};
- op_params.padding_type = tflite::PaddingType::kSame;
- op_params.padding_values.height = _padding_height;
- op_params.padding_values.width = _padding_width;
- op_params.stride_height = params().stride_height;
- op_params.stride_width = params().stride_width;
- tflite::reference_ops::TransposeConv(op_params, //
- getTensorShape(input()), getTensorData<float>(input()), //
- getTensorShape(filter()), getTensorData<float>(filter()), //
- getTensorShape(bias()), getTensorData<float>(bias()), //
- getTensorShape(output()), getTensorData<float>(output()), //
- tflite::RuntimeShape(), nullptr);
-}
-
-void TransposeConv::evalQuantized() const
-{
- tflite::ConvParams op_params{};
- op_params.padding_type = tflite::PaddingType::kSame;
- op_params.padding_values.height = _padding_height;
- op_params.padding_values.width = _padding_width;
- op_params.stride_height = params().stride_height;
- op_params.stride_width = params().stride_width;
- // The kernel expects input and filter zero points to be negated.
- op_params.input_offset = -input()->zero_point(); // Note the '-'.
- op_params.weights_offset = -filter()->zero_point(); // Note the '-'.
- op_params.output_offset = output()->zero_point();
- op_params.output_multiplier = _quant_multipliers[0].multiplier;
- op_params.output_shift = _quant_multipliers[0].shift;
- op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
- op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
-
- auto scratch_tensor = getOutputTensors()[1];
-
- tflite::reference_ops::TransposeConv(op_params, //
- getTensorShape(input()), getTensorData<uint8>(input()), //
- getTensorShape(filter()), getTensorData<uint8>(filter()), //
- getTensorShape(bias()), getTensorData<int32_t>(bias()), //
- getTensorShape(output()), getTensorData<uint8>(output()), //
- tflite::RuntimeShape(), nullptr, //
- getTensorData<int32_t>(scratch_tensor));
-}
-
-void TransposeConv::evalQuantizedPerChannel() const
-{
- const auto *input_data = getTensorData<uint8_t>(input());
- const auto *filter_data = getTensorData<uint8_t>(filter());
- const auto *bias_data = getTensorData<int32_t>(bias());
- auto *output_data = getTensorData<uint8_t>(output());
-
- auto scratch_tensor = getOutputTensors()[1];
- auto *scratch_data = getTensorData<int32_t>(scratch_tensor);
-
- const Shape &input_shape = input()->shape();
- const Shape &filter_shape = filter()->shape();
- const Shape &output_shape = output()->shape();
-
- const int32_t batches = input_shape.dim(0);
- const int32_t input_height = input_shape.dim(1);
- const int32_t input_width = input_shape.dim(2);
- const int32_t input_depth = input_shape.dim(3);
- const int32_t output_depth = filter_shape.dim(0);
- const int32_t filter_height = filter_shape.dim(1);
- const int32_t filter_width = filter_shape.dim(2);
- const int32_t output_height = output_shape.dim(1);
- const int32_t output_width = output_shape.dim(2);
-
- const int32_t stride_height = _params.stride_height;
- const int32_t stride_width = _params.stride_width;
-
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
-
- std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t));
-
- BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
- for (int32_t batch = 0; batch < batches; ++batch)
- {
- for (int32_t in_y = 0; in_y < input_height; ++in_y)
- {
- for (int32_t in_x = 0; in_x < input_width; ++in_x)
- {
- for (int32_t in_c = 0; in_c < input_depth; ++in_c)
- {
- const int32_t out_y_origin = in_y * stride_height - _padding_height;
- const int32_t out_x_origin = in_x * stride_width - _padding_width;
- for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
- {
- for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
- {
- const int32_t out_x = out_x_origin + filter_x;
- const int32_t out_y = out_y_origin + filter_y;
- if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
- {
- for (int32_t out_c = 0; out_c < output_depth; ++out_c)
- {
- const uint8_t input_val =
- input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
- const uint8_t filter_val =
- filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
- scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
- static_cast<int32_t>(input_val - input()->zero_point()) *
- static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
- }
- }
- }
- }
- }
- }
- }
- for (int32_t out_y = 0; out_y < output_height; ++out_y)
- {
- for (int32_t out_x = 0; out_x < output_width; ++out_x)
- {
- for (int32_t out_c = 0; out_c < output_depth; ++out_c)
- {
- int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
- if (bias_data)
- {
- acc += bias_data[out_c];
- }
-
- int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
- acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
-
- scaled_acc += output()->zero_point();
- scaled_acc = std::max(scaled_acc, activation_min);
- scaled_acc = std::min(scaled_acc, activation_max);
-
- output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
- }
- }
- }
- }
-}
-
-void TransposeConv::evalQuantizedS16() const
-{
- const auto *input_data = getTensorData<int16_t>(input());
- const auto *filter_data = getTensorData<int16_t>(filter());
- const auto *bias_data = getTensorData<int64_t>(bias());
- auto *output_data = getTensorData<int16_t>(output());
-
- auto scratch_tensor = getOutputTensors()[1];
- auto *scratch_data = getTensorData<int64_t>(scratch_tensor);
-
- const Shape &input_shape = input()->shape();
- const Shape &filter_shape = filter()->shape();
- const Shape &output_shape = output()->shape();
-
- const int32_t batches = input_shape.dim(0);
- const int32_t input_height = input_shape.dim(1);
- const int32_t input_width = input_shape.dim(2);
- const int32_t input_depth = input_shape.dim(3);
- const int32_t output_depth = filter_shape.dim(0);
- const int32_t filter_height = filter_shape.dim(1);
- const int32_t filter_width = filter_shape.dim(2);
- const int32_t output_height = output_shape.dim(1);
- const int32_t output_width = output_shape.dim(2);
-
- const int32_t stride_height = _params.stride_height;
- const int32_t stride_width = _params.stride_width;
-
- int32_t activation_min{};
- int32_t activation_max{};
- calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
-
- std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t));
-
- BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
- for (int32_t batch = 0; batch < batches; ++batch)
- {
- for (int32_t in_y = 0; in_y < input_height; ++in_y)
- {
- for (int32_t in_x = 0; in_x < input_width; ++in_x)
- {
- for (int32_t in_c = 0; in_c < input_depth; ++in_c)
- {
- const int32_t out_y_origin = in_y * stride_height - _padding_height;
- const int32_t out_x_origin = in_x * stride_width - _padding_width;
- for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
- {
- for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
- {
- const int32_t out_x = out_x_origin + filter_x;
- const int32_t out_y = out_y_origin + filter_y;
- if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
- {
- for (int32_t out_c = 0; out_c < output_depth; ++out_c)
- {
- const int16_t input_val =
- input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
- const int16_t filter_val =
- filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
- scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
- static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
- }
- }
- }
- }
- }
- }
- }
- for (int32_t out_y = 0; out_y < output_height; ++out_y)
- {
- for (int32_t out_x = 0; out_x < output_width; ++out_x)
- {
- for (int32_t out_c = 0; out_c < output_depth; ++out_c)
- {
- int64_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
- if (bias_data)
- {
- acc += bias_data[out_c];
- }
- int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
- acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
-
- scaled_acc = std::max(scaled_acc, activation_min);
- scaled_acc = std::min(scaled_acc, activation_max);
-
- output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
- }
- }
- }
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Unpack.h"
-
-#include "kernels/Utils.h"
-
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-namespace kernels
-{
-
-Unpack::Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams ¶ms)
- : KernelWithParams<UnpackParams>({input}, std::move(outputs), params)
-{
-}
-
-void Unpack::configure()
-{
- const Shape &input_shape = input()->shape();
-
- int axis = _params.axis;
- if (axis < 0)
- axis += input()->shape().num_dims();
- assert(axis >= 0 && axis < input_shape.num_dims());
-
- Shape output_shape(input_shape.num_dims() - 1);
- int out_index = 0;
- for (int in_index = 0; in_index < input_shape.num_dims(); ++in_index)
- {
- if (in_index != axis)
- output_shape.dim(out_index++) = input_shape.dim(in_index);
- }
-
- for (Tensor *output : _outputs)
- {
- assert(output->element_type() == input()->element_type());
- output->resize(output_shape);
- }
-}
-
-template <typename T> void Unpack::executeImpl() const
-{
- tflite::UnpackParams params{};
- params.axis = _params.axis;
- params.num_split = _outputs.size();
- VectorOfTensors<T, false> all_outputs(_outputs);
- tflite::reference_ops::Unpack<T>(params, getTensorShape(input()), getTensorData<T>(input()),
- **all_outputs.shapes(), all_outputs.data());
-}
-
-void Unpack::execute() const
-{
- switch (input()->element_type())
- {
- case DataType::FLOAT32:
- return executeImpl<float>();
- case DataType::U8:
- return executeImpl<uint8_t>();
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernels/Utils.h"
-
-#include <cassert>
-#include <cmath>
-#include <limits>
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-template <typename T>
-void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
-{
- switch (activation)
- {
- case Activation::NONE:
- *activation_min = std::numeric_limits<T>::lowest();
- *activation_max = std::numeric_limits<T>::max();
- break;
- case Activation::RELU:
- *activation_min = 0;
- *activation_max = std::numeric_limits<T>::max();
- break;
- case Activation::RELU_N1_TO_1:
- *activation_min = -1;
- *activation_max = 1;
- break;
- case Activation::RELU6:
- *activation_min = 0;
- *activation_max = 6;
- break;
- default:
- throw std::runtime_error("Unsupported activation.");
- }
-}
-
-template void calculateActivationRange(Activation activation, float *activation_min,
- float *activation_max);
-template void calculateActivationRange(Activation activation, int32_t *activation_min,
- int32_t *activation_max);
-template void calculateActivationRange(Activation activation, int64_t *activation_min,
- int64_t *activation_max);
-
-static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
- const Tensor *output, int32_t *activation_min,
- int32_t *activation_max)
-{
- const float scale = output->scale();
- const int32_t zero_point = output->zero_point();
-
- auto quantize = [scale, zero_point](float x) {
- return zero_point + static_cast<int32_t>(std::round(x / scale));
- };
-
- switch (activation)
- {
- case Activation::NONE:
- case Activation::TANH:
- *activation_min = qmin;
- *activation_max = qmax;
- break;
- case Activation::RELU:
- *activation_min = std::max(qmin, quantize(0.0f));
- *activation_max = qmax;
- break;
- case Activation::RELU_N1_TO_1:
- *activation_min = std::max(qmin, quantize(-1.0f));
- *activation_max = std::min(qmax, quantize(1.0f));
- break;
- case Activation::RELU6:
- *activation_min = std::max(qmin, quantize(0.0f));
- *activation_max = std::min(qmax, quantize(6.0f));
- break;
- default:
- throw std::runtime_error("Unsupported activation.");
- }
-}
-
-void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
- int32_t *activation_min, int32_t *activation_max)
-{
- assert(output->zero_points().size() == 1);
- int32_t qmin{};
- int32_t qmax{};
- switch (output->element_type())
- {
- case DataType::U8:
- qmin = 0;
- qmax = std::numeric_limits<uint8_t>::max();
- break;
- case DataType::S8:
- qmin = -std::numeric_limits<int8_t>::max();
- qmax = std::numeric_limits<int8_t>::max();
- break;
- case DataType::S16:
- // For now, assume that signed int16 type implies signed symmetric quantization.
- assert(output->zero_point() == 0);
- qmin = -std::numeric_limits<int16_t>::max();
- qmax = std::numeric_limits<int16_t>::max();
- break;
- default:
- throw std::runtime_error("Unsupported type.");
- }
-
- calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, activation_min,
- activation_max);
-}
-
-void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
-{
- if (double_multiplier == 0.0)
- {
- *quantized_multiplier = 0;
- *shift = 0;
- return;
- }
-
- const double q = std::frexp(double_multiplier, shift);
- auto q_fixed = static_cast<int64_t>(std::round(q * (INT64_C(1) << 31)));
-
- if (q_fixed == (INT64_C(1) << 31))
- {
- q_fixed /= 2;
- ++*shift;
- }
- assert(q_fixed <= std::numeric_limits<int32_t>::max());
- // A shift amount smaller than -31 would cause all bits to be shifted out
- // and thus all results would be zero. We implement that instead with
- // q_fixed==0, so as to avoid hitting issues with right-shift
- // operations with shift amounts greater than 31. Note that this happens
- // roughly when abs(double_multiplier) < 2^-31 and the present handling means
- // that we're effectively flushing tiny double_multiplier's to zero.
- // We could conceivably handle values in the range (roughly) [32, 63]
- // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
- // the present handling is just doing 'flush denormals to zero'. We could
- // reconsider and actually generate nonzero denormals if a need arises.
- if (*shift < -31)
- {
- *shift = 0;
- q_fixed = 0;
- }
- *quantized_multiplier = static_cast<int32_t>(q_fixed);
-}
-
-void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
- int *left_shift)
-{
- assert(double_multiplier < 1.0);
- assert(double_multiplier > 0.0);
- int shift;
- quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
- assert(shift <= 0);
- *left_shift = shift;
-}
-
-Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape)
-{
- const int num_input1_dims = input1_shape.num_dims();
- const int num_input2_dims = input2_shape.num_dims();
- const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
- Shape output_shape(num_out_dims);
-
- for (int i = 0; i < num_out_dims; ++i)
- {
- const int32_t input1_dim = i < num_input1_dims ? input1_shape.dim(num_input1_dims - i - 1) : 1;
- const int32_t input2_dim = i < num_input2_dims ? input2_shape.dim(num_input2_dims - i - 1) : 1;
-
- bool need_broadcast = input1_dim != input2_dim;
- bool can_broadcast = input1_dim == 1 || input2_dim == 1;
- LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
-
- output_shape.dim(num_out_dims - i - 1) = std::max(input1_dim, input2_dim);
- }
-
- return output_shape;
-}
-
-} // namespace kernels
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_KERNELS_UTILS_H
-#define LUCI_INTERPRETER_KERNELS_UTILS_H
-
-#include "core/KernelParams.h"
-#include "luci_interpreter/core/Tensor.h"
-
-#include <tensorflow/lite/kernels/internal/types.h>
-
-#include <cassert>
-#include <cstdint>
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-namespace kernels
-{
-
-#define LUCI_INTERPRETER_CHECK(cond) \
- if (!(cond)) \
- throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + +"(" + \
- std::string(#cond) + ") was not true.");
-
-inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
- int32_t filter_size, int32_t out_size)
-{
- const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
- const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
- return padding > 0 ? padding : 0;
-}
-
-inline int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size,
- int32_t filter_size, int32_t out_size, int32_t *offset)
-{
- int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
- int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size);
- total_padding = total_padding > 0 ? total_padding : 0;
- *offset = total_padding % 2;
- return total_padding / 2;
-}
-
-inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size,
- int32_t stride, int32_t dilation_rate = 1)
-{
- const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
- switch (padding)
- {
- case Padding::SAME:
- return (image_size + stride - 1) / stride;
- case Padding::VALID:
- return (image_size + stride - effective_filter_size) / stride;
- default:
- assert(false);
- return 0;
- }
-}
-
-inline int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
-{
- return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3;
-}
-
-template <typename T>
-void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
-
-void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
- int32_t *activation_min, int32_t *activation_max);
-
-template <typename T> constexpr bool one_of_types() { return false; }
-
-// Checks if T is equal to one of {U,Other} types
-template <typename T, typename U, typename... Other> constexpr bool one_of_types()
-{
- return std::is_same<T, U>::value || one_of_types<T, Other...>();
-}
-
-/**
- * Fills activation min and max parameters depending on given data type and activation
- *
- * T is a template parameter, so after optimization this code left with only required if case
- *
- * @tparam T data type of arithmetic operation output tensor
- * @param params tflite params to fill
- * @param activation luci_interpreter::Activation of arithmetic operation
- */
-template <typename T>
-void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act)
-{
- static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype");
-
- if (std::is_same<T, float>::value)
- calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max);
- if (std::is_same<T, int32_t>::value)
- calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max);
- else
- calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max);
-}
-
-// Decompose a double multiplier into a Q0.31 int32 representation of its
-// significand, and shift representation of its exponent.
-//
-// Handles an arbitrary positive multiplier. The 'shift' output-value is
-// basically the 'floating-point exponent' of the multiplier:
-// Negative for a right-shift (when the multiplier is <1), positive for a
-// left-shift (when the multiplier is >1)
-void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
-
-// Decompose a double multiplier into a Q0.31 int32 representation of its
-// significand, and shift representation of NEGATIVE its exponent ---
-// this is intended as a RIGHT-shift.
-//
-// Restricted to the case where the multiplier < 1 (and non-negative).
-void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
- int *left_shift);
-
-Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape);
-
-inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale,
- float output_scale)
-{
- const double input_product_scale = static_cast<double>(input_scale * filter_scale);
- LUCI_INTERPRETER_CHECK(input_product_scale >= 0);
- return input_product_scale / static_cast<double>(output_scale);
-}
-
-// TODO rename getQuantizedConvolutionMultiplers to something more general
-// it is used for non conv operators too
-inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale,
- const std::vector<float> &filter_scale,
- float output_scale)
-{
- std::vector<double> effective_output_scales;
- size_t n = filter_scale.size();
- effective_output_scales.reserve(n);
- for (size_t i = 0; i < n; ++i)
- {
- effective_output_scales.push_back(
- getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
- }
- return effective_output_scales;
-}
-
-struct ChannelQuantMultipliers
-{
- int shift;
- int32_t multiplier;
- ChannelQuantMultipliers() = default;
-};
-
-inline std::vector<ChannelQuantMultipliers>
-quantizeMultipliers(const std::vector<double> &effective_scale)
-{
- size_t n = effective_scale.size();
- std::vector<ChannelQuantMultipliers> params(n);
- for (size_t i = 0; i < n; ++i)
- {
- quantizeMultiplier(effective_scale[i], ¶ms[i].multiplier, ¶ms[i].shift);
- }
- return params;
-}
-
-// Helper wrapper to hide broadcast logic
-template <typename T> class BroadcastableWrapper
-{
-public:
- BroadcastableWrapper(const std::vector<T> &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {}
-
- T operator[](int idx) { return _v[idx * _stride]; }
-
-private:
- const std::vector<T> &_v;
- int _stride;
-};
-
-inline tflite::RuntimeShape getTensorShape(const Tensor *tensor)
-{
- if (tensor == nullptr)
- return tflite::RuntimeShape();
-
- const Shape &shape = tensor->shape();
- tflite::RuntimeShape runtime_shape(shape.num_dims());
- for (int i = 0; i < shape.num_dims(); ++i)
- {
- runtime_shape.SetDim(i, shape.dim(i));
- }
- return runtime_shape;
-}
-
-template <typename T> const T *getTensorData(const Tensor *tensor)
-{
- return tensor != nullptr ? tensor->data<T>() : nullptr;
-}
-
-template <typename T> T *getTensorData(Tensor *tensor)
-{
- return tensor != nullptr ? tensor->data<T>() : nullptr;
-}
-
-// A list of tensors in a format that can be used by kernels like split and
-// concatenation.
-template <typename T, bool is_const> class VectorOfTensors
-{
-public:
- using ElementT = typename std::conditional<is_const, const T, T>::type;
- using TensorT = typename std::conditional<is_const, const Tensor, Tensor>::type;
-
- // Build with the tensors in 'tensor_list'.
- explicit VectorOfTensors(const std::vector<TensorT *> &tensor_list)
- {
- const int num_tensors = tensor_list.size();
-
- all_data_.reserve(num_tensors);
- all_shape_.reserve(num_tensors);
- all_shape_ptr_.reserve(num_tensors);
-
- for (TensorT *tensor : tensor_list)
- {
- all_data_.push_back(getTensorData<T>(tensor));
- all_shape_.push_back(getTensorShape(tensor));
- }
-
- // Taking the pointer from inside a std::vector is only OK if the vector is
- // never modified, so we populate all_shape in the previous loop and then we
- // are free to grab iterators here.
- for (tflite::RuntimeShape &shape : all_shape_)
- {
- all_shape_ptr_.push_back(&shape);
- }
- }
- // Return a pointer to the data pointers of all tensors in the list. For
- // example:
- // float* const* f = v.data();
- // f[0][1] is the second element of the first tensor.
- ElementT *const *data() const { return all_data_.data(); }
-
- // Return a pointer the shape pointers of all tensors in the list. For
- // example:
- // const RuntimeShape* const* d = v.dims();
- // dims[1] are the dimensions of the second tensor in the list.
- const tflite::RuntimeShape *const *shapes() const { return all_shape_ptr_.data(); }
-
-private:
- std::vector<ElementT *> all_data_;
- std::vector<tflite::RuntimeShape> all_shape_;
- std::vector<tflite::RuntimeShape *> all_shape_ptr_;
-};
-
-// A list of quantized tensors in a format that can be used by kernels like
-// split and concatenation.
-template <bool is_const> class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t, is_const>
-{
-public:
- using typename VectorOfTensors<uint8_t, is_const>::TensorT;
-
- // Build with the tensors in 'tensor_list'.
- explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list)
- : VectorOfTensors<uint8_t, is_const>(tensor_list)
- {
- for (TensorT *tensor : tensor_list)
- {
- zero_point_.push_back(tensor->zero_point());
- scale_.push_back(tensor->scale());
- }
- }
-
- const float *scale() const { return scale_.data(); }
- const int32_t *zero_point() const { return zero_point_.data(); }
-
-private:
- std::vector<int32_t> zero_point_;
- std::vector<float> scale_;
-};
-
-} // namespace kernels
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_KERNELS_UTILS_H
+++ /dev/null
-set(SOURCES
- GraphLoader.h
- GraphLoader.cpp
- KernelBuilderHelper.h
- KernelBuilderHelper.cpp
- KernelBuilder.h
- KernelBuilder.cpp
- ModuleLoader.h
- ModuleLoader.cpp
- RuntimeToIR.h
- nodes/Builders.h)
-
-# include kernel specific builders
-macro(REGISTER_KERNEL NODE)
- list(APPEND SOURCES "nodes/${NODE}.cpp")
-endmacro(REGISTER_KERNEL)
-include(${KERNEL_REGISTER_FILE})
-
-add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES})
-if (NOT NNCC_LIBRARY_NO_PIC)
- set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
-endif(NOT NNCC_LIBRARY_NO_PIC)
-target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_PAL_DIR}")
-target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
-
-target_link_libraries(${LUCI_INTERPRETER_LOADER}
- PUBLIC luci_lang ${LUCI_INTERPRETER_CORE}
- PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common luci_plan)
-
-if(NOT ENABLE_TEST)
- return()
-endif(NOT ENABLE_TEST)
-
-nnas_find_package(GTest REQUIRED)
-
-set(TEST_SOURCES KernelBuilder.test.cpp)
-
-GTest_AddTest(${LUCI_INTERPRETER_LOADER}_test ${TEST_SOURCES})
-target_link_libraries(${LUCI_INTERPRETER_LOADER}_test ${LUCI_INTERPRETER_LOADER})
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "loader/GraphLoader.h"
-
-#include "loader/KernelBuilder.h"
-
-#include <luci/Plan/CircleNodeExecutionPlan.h>
-#include <loco/IR/Algorithm.h>
-
-namespace luci_interpreter
-{
-namespace
-{
-
-template <typename NodeT> Shape getNodeShape(const NodeT *node)
-{
- Shape shape(node->rank());
- for (uint32_t i = 0; i < node->rank(); ++i)
- {
- shape.dim(i) = node->dim(i).value();
- }
- return shape;
-}
-
-template <DataType DT> const void *getNodeDataImpl(const luci::CircleConst *node, size_t *data_size)
-{
- const size_t element_size = getDataTypeSize(DT);
- const int32_t num_elements = node->size<DT>();
-
- *data_size = num_elements * element_size;
- if (*data_size > 0)
- {
- // FIXME There is no good way to get the pointer to the data currently.
- return &node->at<DT>(0);
- }
- return nullptr;
-}
-
-const void *getNodeData(const luci::CircleConst *node, size_t *data_size)
-{
- switch (node->dtype())
- {
- case DataType::U8:
- return getNodeDataImpl<DataType::U8>(node, data_size);
- case DataType::FLOAT32:
- return getNodeDataImpl<DataType::FLOAT32>(node, data_size);
- case DataType::S8:
- return getNodeDataImpl<DataType::S8>(node, data_size);
- case DataType::S16:
- return getNodeDataImpl<DataType::S16>(node, data_size);
- case DataType::S32:
- return getNodeDataImpl<DataType::S32>(node, data_size);
- case DataType::S64:
- return getNodeDataImpl<DataType::S64>(node, data_size);
- case DataType::BOOL:
- return getNodeDataImpl<DataType::BOOL>(node, data_size);
- default:
- throw std::runtime_error("Unsupported type.");
- }
-}
-
-const void *getNodeData(const luci::CircleCustom *node, size_t *data_size)
-{
- if (node->custom_code() != "CircleReferencingConst")
- return nullptr;
-
- // helper struct which describes data loaded to custom_options of CircleReferencingConst node
- // TODO move this struct to header
- struct ConstDataReference
- {
- const uint8_t *data = nullptr;
- uint32_t size = 0;
- };
-
- const auto &custom_options = node->custom_options();
- const auto &const_data_ref = *reinterpret_cast<const ConstDataReference *>(custom_options.data());
-
- *data_size = const_data_ref.size;
- return const_data_ref.data;
-}
-
-bool isExecutableNode(const luci::CircleNode *node)
-{
- switch (node->opcode())
- {
- // These nodes denote inputs / outputs of a graph.
- case luci::CircleOpcode::CIRCLECONST:
- case luci::CircleOpcode::CIRCLEINPUT:
- case luci::CircleOpcode::CIRCLEOUTPUT:
- case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE:
- // The following nodes denote outputs of multiple-output nodes.
- case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT:
- case luci::CircleOpcode::CIRCLECUSTOMOUT:
- case luci::CircleOpcode::CIRCLEIFOUT:
- case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT:
- case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT:
- case luci::CircleOpcode::CIRCLESPLITOUT:
- case luci::CircleOpcode::CIRCLESPLITVOUT:
- case luci::CircleOpcode::CIRCLETOPKV2OUT:
- case luci::CircleOpcode::CIRCLEUNIQUEOUT:
- case luci::CircleOpcode::CIRCLEUNPACKOUT:
- case luci::CircleOpcode::CIRCLEVARIABLE:
- case luci::CircleOpcode::CIRCLEWHILEOUT:
- return false;
- // Custom nodes may be executable and non-executable
- case luci::CircleOpcode::CUSTOM:
- {
- auto const custom_node = loco::must_cast<const luci::CircleCustom *>(node);
-
- // TODO handle more non-executable Custom ops here
- if (custom_node->custom_code() == "CircleReferencingConst")
- return false;
-
- return true;
- }
- default:
- return true;
- }
-}
-
-bool isTensorProducingNode(const luci::CircleNode *node)
-{
- switch (node->opcode())
- {
- // Output nodes do not produce tensors.
- case luci::CircleOpcode::CIRCLEOUTPUT:
- // The following nodes are multiple-output nodes. They do not produce tensors, the tensors
- // are produced by the corresponding *Out nodes instead.
- case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM:
- case luci::CircleOpcode::CUSTOM:
- case luci::CircleOpcode::IF:
- case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4:
- case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5:
- case luci::CircleOpcode::SPLIT:
- case luci::CircleOpcode::SPLIT_V:
- case luci::CircleOpcode::TOPK_V2:
- case luci::CircleOpcode::UNIQUE:
- case luci::CircleOpcode::UNPACK:
- case luci::CircleOpcode::WHILE:
- return false;
- default:
- return true;
- }
-}
-
-bool isSupportedCustomNode(const luci::CircleNode *node)
-{
- const auto custom_node = loco::must_cast<const luci::CircleCustom *>(node);
-
- // TODO handle more Custom ops here
- if (custom_node->custom_code() == "CircleReferencingConst")
- return true;
-
- return false;
-}
-
-} // namespace
-
-GraphLoader::GraphLoader(
- const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, IMemoryManager *memory_manager)
- : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
- _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor),
- _memory_manager(memory_manager)
-{
-}
-
-void GraphLoader::loadTensors()
-{
- for (uint32_t i = 0; i < _graph->nodes()->size(); ++i)
- {
- const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));
-
- if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node))
- throw std::runtime_error("Unsupported Custom operator. " + node->name());
-
- if (!isTensorProducingNode(node))
- continue;
-
- // Only Input, Const, Custom and Variable nodes have shapes. Shapes of intermediate tensors will
- // be inferred.
- Shape shape{};
- switch (node->opcode())
- {
- case luci::CircleOpcode::CIRCLECONST:
- case luci::CircleOpcode::CIRCLECUSTOMOUT:
- case luci::CircleOpcode::CIRCLEINPUT:
- case luci::CircleOpcode::CIRCLEVARIABLE:
- shape = getNodeShape(node);
- break;
- default:
- break;
- }
-
- AffineQuantization quantization;
- if (node->quantparam() != nullptr)
- {
- const luci::CircleQuantParam *params = node->quantparam();
- assert(params->scale.size() == params->zerop.size());
- quantization.scale.assign(params->scale.cbegin(), params->scale.cend());
- quantization.zero_point.assign(params->zerop.cbegin(), params->zerop.cend());
- quantization.quantized_dimension = params->quantized_dimension;
- }
-
- auto tensor = std::make_unique<Tensor>(node->dtype(), std::move(shape), std::move(quantization),
- node->name());
-
- // If node has execution plan then read memory offsets for nodes
- // from the beginning of shared memory buffer. Used in Static Memory Manager.
- if (luci::has_execution_plan(node))
- {
- auto execution_plan = luci::get_execution_plan(node);
- assert(!execution_plan.offsets().empty());
- tensor->set_offset(execution_plan.offsets().front());
- }
-
- if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
- {
- size_t data_size{};
- const void *const_data = getNodeData(const_node, &data_size);
- if (const_data != nullptr)
- {
- _memory_manager->allocate_memory(*tensor);
- tensor->writeData(const_data, data_size);
- }
- }
- else if (const auto *custom_out_node = dynamic_cast<const luci::CircleCustomOut *>(node))
- {
- const auto *custom_node =
- loco::must_cast<const luci::CircleCustom *>(custom_out_node->input());
-
- if (custom_node->custom_code() == "CircleReferencingConst")
- {
- size_t data_size{};
- const void *const_data = getNodeData(custom_node, &data_size);
- if (const_data != nullptr)
- {
- _memory_manager->allocate_memory(*tensor);
- tensor->writeData(const_data, data_size);
- }
- }
- }
-
- _node_to_tensor.emplace(node, tensor.get());
- _runtime_to_ir.tensor_to_node.emplace(tensor.get(), node);
-
- _runtime_graph->addTensor(std::move(tensor));
- }
-}
-
-void GraphLoader::initInputOutputTensors() const
-{
- auto input_nodes = loco::input_nodes(_graph);
- std::vector<Tensor *> input_tensors(input_nodes.size());
- for (size_t i = 0; i < input_nodes.size(); ++i)
- {
- input_tensors[i] = _node_to_tensor.at(input_nodes[i]);
- _memory_manager->allocate_memory(*input_tensors[i]);
- }
- _runtime_graph->setInputTensors(input_tensors);
-
- auto output_nodes = loco::output_nodes(const_cast<loco::Graph *>(_graph));
- std::vector<Tensor *> output_tensors(output_nodes.size());
- for (size_t i = 0; i < output_nodes.size(); ++i)
- {
- const auto *node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
- output_tensors[i] = _node_to_tensor.at(node->from());
- }
- _runtime_graph->setOutputTensors(output_tensors);
-}
-
-void GraphLoader::loadOperators()
-{
- KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor);
-
- // Create kernels for executable nodes. This has to be done in execution order.
- auto graph = const_cast<loco::Graph *>(_graph);
-
- auto const graph_nodes = loco::all_nodes(graph);
-
- // Checking for execution plan in node annotations.
- bool has_execution_annotation = true;
- auto const checking_exec_plan = [&has_execution_annotation](auto const node) {
- const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
- if (!luci::has_execution_plan(circle_node))
- has_execution_annotation = false;
- };
- std::for_each(begin(graph_nodes), end(graph_nodes), checking_exec_plan);
-
- if (has_execution_annotation)
- {
- // Build ordered_nodes vector that stores the order of execution of graph nodes.
- std::vector<const luci::CircleNode *> ordered_nodes(graph_nodes.size());
-
- auto const filler = [&ordered_nodes](auto const node) {
- const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
- auto const position = luci::get_execution_plan(circle_node).order_in_plan();
- ordered_nodes.at(position) = circle_node;
- };
- std::for_each(begin(graph_nodes), end(graph_nodes), filler);
-
- for (auto node : ordered_nodes)
- {
- if (isExecutableNode(node))
- {
- std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
- _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
- _runtime_graph->addKernel(std::move(kernel));
- }
- }
- }
- else
- {
- // If it is impossible to build the execution order plan,
- // then we use the default postorder_traversal approach.
- for (const loco::Node *loco_node : loco::postorder_traversal(loco::output_nodes(graph)))
- {
- const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node);
- if (isExecutableNode(node))
- {
- std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
- _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
- _runtime_graph->addKernel(std::move(kernel));
- }
- }
- }
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
-#define LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
-
-#include "core/RuntimeGraph.h"
-#include "loader/RuntimeToIR.h"
-#include "luci_interpreter/MemoryManager.h"
-
-#include <loco/IR/Graph.h>
-
-#include <unordered_map>
-
-namespace luci_interpreter
-{
-
-class GraphLoader
-{
-public:
- GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
- IMemoryManager *memory_manager);
-
- void loadTensors();
- void initInputOutputTensors() const;
- void loadOperators();
-
-private:
- const loco::Graph *_graph;
- RuntimeGraph *_runtime_graph;
- RuntimeToIR &_runtime_to_ir;
- IMemoryManager *_memory_manager;
-
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
- std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "loader/KernelBuilder.h"
-#include "loader/nodes/Builders.h"
-
-#include <stdexcept>
-
-namespace luci_interpreter
-{
-
-#define CIRCLE_NODE(OPCODE, CLASS) CLASS,
-#define CIRCLE_VNODE(OPCODE, CLASS) CLASS,
-
-// This enum is auxiliary.
-// It is duplicate of luci::CircleOpcode but initialized with CLASS instead of OPCODE,
-// because list of target operators is in format of CLASS names
-enum class BuilderId
-{
-#include <luci/IR/CircleNodes.lst>
- Size // casts to count of values in BuilderId enum
-};
-
-#undef CIRCLE_VNODE
-#undef CIRCLE_NODE
-
-/**
- * @brief Registry of kernel builders
- *
- * This class contains mapping from Opcodes to kernel builder functions
- */
-
-class KernelBuilderRegistry
-{
-public:
- using KernelBuilderFunc = std::unique_ptr<Kernel>(const luci::CircleNode *,
- KernelBuilderHelper &);
-
- KernelBuilderRegistry() : _operator_builders(size_t(BuilderId::Size), nullptr)
- {
-#define REGISTER_KERNEL(name) \
- register_kernel_builder(BuilderId::Circle##name, build_kernel_Circle##name);
-
-#include "KernelsToBuild.lst"
-
-#undef REGISTER_KERNEL
- }
-
- KernelBuilderFunc *get_kernel_builder_func(luci::CircleOpcode opcode) const
- {
- return _operator_builders.at(size_t(opcode));
- }
-
-private:
- std::vector<KernelBuilderFunc *> _operator_builders;
-
- void register_kernel_builder(BuilderId id, KernelBuilderFunc *func)
- {
- // Using BuilderId is a duplicate of luci::CirclreOpcode,
- // size_t(id) is equal to size_t(corresponding operation opcode).
- assert(size_t(id) < _operator_builders.size());
- _operator_builders[size_t(id)] = func;
- }
-};
-
-KernelBuilder::KernelBuilder(
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
- : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
-{
- _builder_registry = std::make_unique<KernelBuilderRegistry>();
-}
-
-KernelBuilder::~KernelBuilder()
-{
- // Need to define in this CPP to hide KernelBuilderRegistry internals.
- // This destructor deletes _builder_registry
-}
-
-std::unique_ptr<Kernel> KernelBuilder::build(const luci::CircleNode *node)
-{
- auto specific_builder = _builder_registry->get_kernel_builder_func(node->opcode());
- if (specific_builder != nullptr)
- return specific_builder(node, *this);
-
- std::string msg = "Unsupported operator: ";
- msg += std::to_string(static_cast<uint32_t>(node->opcode())) + " " + std::string(node->name());
- throw std::invalid_argument(msg.c_str());
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
-#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
-
-#include "loader/KernelBuilderHelper.h"
-
-#include "core/Kernel.h"
-#include "core/RuntimeGraph.h"
-
-#include <luci/IR/CircleNodeVisitor.h>
-
-#include <memory>
-#include <unordered_map>
-
-namespace luci_interpreter
-{
-
-class KernelBuilderRegistry;
-
-class KernelBuilder : public KernelBuilderHelper
-{
-public:
- KernelBuilder(
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
-
- ~KernelBuilder();
-
- std::unique_ptr<Kernel> build(const luci::CircleNode *node);
-
-private:
- std::unique_ptr<KernelBuilderRegistry> _builder_registry;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "loader/GraphLoader.h"
-#include "loader/KernelBuilder.h"
-#include "luci_interpreter/SimpleMemoryManager.h"
-
-#include <kernels/Add.h>
-#include <kernels/ArgMax.h>
-#include <kernels/AveragePool2D.h>
-#include <kernels/BatchMatMul.h>
-#include <kernels/Cast.h>
-#include <kernels/Concatenation.h>
-#include <kernels/Conv2D.h>
-#include <kernels/DepthToSpace.h>
-#include <kernels/DepthwiseConv2D.h>
-#include <kernels/Div.h>
-#include <kernels/Elu.h>
-#include <kernels/Exp.h>
-#include <kernels/Floor.h>
-#include <kernels/FloorDiv.h>
-#include <kernels/Equal.h>
-#include <kernels/FullyConnected.h>
-#include <kernels/Greater.h>
-#include <kernels/GreaterEqual.h>
-#include <kernels/InstanceNorm.h>
-#include <kernels/L2Normalize.h>
-#include <kernels/L2Pool2D.h>
-#include <kernels/LeakyRelu.h>
-#include <kernels/Less.h>
-#include <kernels/LessEqual.h>
-#include <kernels/LocalResponseNormalization.h>
-#include <kernels/LogicalAnd.h>
-#include <kernels/LogicalNot.h>
-#include <kernels/LogicalOr.h>
-#include <kernels/Logistic.h>
-#include <kernels/LogSoftmax.h>
-#include <kernels/Maximum.h>
-#include <kernels/MaxPool2D.h>
-#include <kernels/Mean.h>
-#include <kernels/Minimum.h>
-#include <kernels/Mul.h>
-#include <kernels/Neg.h>
-#include <kernels/NotEqual.h>
-#include <kernels/OneHot.h>
-#include <kernels/Pad.h>
-#include <kernels/PadV2.h>
-#include <kernels/Pow.h>
-#include <kernels/PRelu.h>
-#include <kernels/Relu.h>
-#include <kernels/Relu6.h>
-#include <kernels/Reshape.h>
-#include <kernels/ResizeBilinear.h>
-#include <kernels/ResizeNearestNeighbor.h>
-#include <kernels/ReverseV2.h>
-#include <kernels/Rsqrt.h>
-#include <kernels/Slice.h>
-#include <kernels/Softmax.h>
-#include <kernels/SpaceToDepth.h>
-#include <kernels/Split.h>
-#include <kernels/SplitV.h>
-#include <kernels/Sqrt.h>
-#include <kernels/SquaredDifference.h>
-#include <kernels/Squeeze.h>
-#include <kernels/StridedSlice.h>
-#include <kernels/Sub.h>
-#include <kernels/Tanh.h>
-#include <kernels/Transpose.h>
-#include <kernels/TransposeConv.h>
-#include <kernels/Unpack.h>
-
-#include <gmock/gmock.h>
-
-namespace luci_interpreter
-{
-namespace
-{
-
-using namespace testing;
-
-class KernelBuilderTest : public Test
-{
-protected:
- luci::CircleInput *createInputNode() { return createNode<luci::CircleInput>(); }
- void SetUp() override { _memory_manager = std::make_unique<SimpleMemoryManager>(); }
-
- std::unique_ptr<IMemoryManager> _memory_manager;
-
- template <typename NodeT, typename... Args> NodeT *createNode(Args &&... args)
- {
- auto *node = _graph.nodes()->create<NodeT>(std::forward<Args>(args)...);
- // The actual type does not matter for the purpose of the tests.
- // NOTE The type is meaningless for nodes with multiple outputs (corresponding *Out nodes carry
- // actual output types).
- node->dtype(loco::DataType::FLOAT32);
- return node;
- }
-
- template <typename NodeOutT> NodeOutT *createNodeOut(loco::Node *node, int index)
- {
- auto *node_out = createNode<NodeOutT>();
- node_out->input(node);
- node_out->index(index);
- return node_out;
- }
-
- template <typename KernelT> std::unique_ptr<KernelT> buildKernel(const luci::CircleNode *op)
- {
- std::unordered_map<const loco::Graph *, RuntimeGraph *> graph_to_runtime_graph;
-
- RuntimeGraph runtime_graph(nullptr, _memory_manager.get());
- graph_to_runtime_graph[&_graph] = &runtime_graph;
- RuntimeToIR runtime_to_ir;
- GraphLoader graph_loader(&_graph, &runtime_graph, runtime_to_ir, graph_to_runtime_graph,
- _node_to_tensor, _memory_manager.get());
- graph_loader.loadTensors();
-
- KernelBuilder kernel_builder(graph_to_runtime_graph, _node_to_tensor);
-
- auto kernel = kernel_builder.build(op);
- return std::unique_ptr<KernelT>(dynamic_cast<KernelT *>(kernel.release()));
- }
-
- void checkTensor(const Tensor *tensor, const loco::Node *node)
- {
- EXPECT_THAT(tensor, Eq(_node_to_tensor.at(node)));
- }
-
-private:
- loco::Graph _graph;
- std::unordered_map<const loco::Node *, Tensor *> _node_to_tensor;
-};
-
-TEST_F(KernelBuilderTest, Add)
-{
- auto *input1 = createInputNode();
- auto *input2 = createInputNode();
-
- auto *op = createNode<luci::CircleAdd>();
- op->x(input1);
- op->y(input2);
-
- op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
- auto kernel = buildKernel<kernels::Add>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input1(), input1);
- checkTensor(kernel->input2(), input2);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, ArgMax)
-{
- auto *input = createInputNode();
- auto *axis = createInputNode();
-
- auto *op = createNode<luci::CircleArgMax>();
- op->input(input);
- op->dimension(axis);
-
- op->output_type(loco::DataType::FLOAT32);
-
- auto kernel = buildKernel<kernels::ArgMax>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->axis(), axis);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().output_type, Eq(op->output_type()));
-}
-
-TEST_F(KernelBuilderTest, AveragePool2D)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleAveragePool2D>();
- op->value(input);
-
- op->padding(luci::Padding::SAME);
- op->filter()->h(11);
- op->filter()->w(13);
- op->stride()->h(17);
- op->stride()->w(19);
- op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
- auto kernel = buildKernel<kernels::AveragePool2D>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
- EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
- EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
- EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
- EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
- EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, BatchMatMul)
-{
- auto *lhs = createInputNode();
- auto *rhs = createInputNode();
-
- auto *op = createNode<luci::CircleBatchMatMul>();
- op->x(lhs);
- op->y(rhs);
- op->adj_x(false);
- op->adj_y(false);
-
- auto kernel = buildKernel<kernels::BatchMatMul>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->x(), lhs);
- checkTensor(kernel->y(), rhs);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().adj_x, Eq(op->adj_x()));
- EXPECT_THAT(kernel->params().adj_y, Eq(op->adj_y()));
-}
-
-TEST_F(KernelBuilderTest, Cast)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleCast>();
- op->x(input);
-
- auto kernel = buildKernel<kernels::Cast>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Concatenation)
-{
- auto *input1 = createInputNode();
- auto *input2 = createInputNode();
-
- auto *op = createNode<luci::CircleConcatenation>(2);
- op->values(0, input1);
- op->values(1, input2);
- op->axis(11);
-
- auto kernel = buildKernel<kernels::Concatenation>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(0), input1);
- checkTensor(kernel->input(1), input2);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
- EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, Conv2D)
-{
- auto *input = createInputNode();
- auto *filter = createInputNode();
- auto *bias = createInputNode();
-
- auto *op = createNode<luci::CircleConv2D>();
- op->input(input);
- op->filter(filter);
- op->bias(bias);
-
- op->padding(luci::Padding::SAME);
- op->stride()->h(11);
- op->stride()->w(13);
- op->dilation()->h(17);
- op->dilation()->w(19);
- op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
- auto kernel = buildKernel<kernels::Conv2D>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->filter(), filter);
- checkTensor(kernel->bias(), bias);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
- EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
- EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
- EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h()));
- EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w()));
- EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, DepthToSpace)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleDepthToSpace>();
- op->input(input);
-
- op->block_size(11);
-
- auto kernel = buildKernel<kernels::DepthToSpace>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().block_size, Eq(op->block_size()));
-}
-
-TEST_F(KernelBuilderTest, DepthwiseConv2D)
-{
- auto *input = createInputNode();
- auto *filter = createInputNode();
- auto *bias = createInputNode();
-
- auto *op = createNode<luci::CircleDepthwiseConv2D>();
- op->input(input);
- op->filter(filter);
- op->bias(bias);
-
- op->padding(luci::Padding::SAME);
- op->depthMultiplier(11);
- op->stride()->h(13);
- op->stride()->w(17);
- op->dilation()->h(19);
- op->dilation()->w(23);
- op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
- auto kernel = buildKernel<kernels::DepthwiseConv2D>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->filter(), filter);
- checkTensor(kernel->bias(), bias);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
- EXPECT_THAT(kernel->params().depth_multiplier, Eq(op->depthMultiplier()));
- EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
- EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
- EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h()));
- EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w()));
- EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, Div)
-{
- auto *input1 = createInputNode();
- auto *input2 = createInputNode();
-
- auto *op = createNode<luci::CircleDiv>();
- op->x(input1);
- op->y(input2);
-
- op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
- auto kernel = buildKernel<kernels::Div>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input1(), input1);
- checkTensor(kernel->input2(), input2);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, Elu)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleElu>();
- op->features(input);
-
- auto kernel = buildKernel<kernels::Elu>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Exp)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleExp>();
- op->x(input);
-
- auto kernel = buildKernel<kernels::Exp>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Floor)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleFloor>();
- op->x(input);
-
- auto kernel = buildKernel<kernels::Floor>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, FloorDiv)
-{
- auto *x = createInputNode();
- auto *y = createInputNode();
-
- auto *op = createNode<luci::CircleFloorDiv>();
- op->x(x);
- op->y(y);
-
- auto kernel = buildKernel<kernels::FloorDiv>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->x(), x);
- checkTensor(kernel->y(), y);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Equal)
-{
- auto *x_input = createInputNode();
- auto *y_input = createInputNode();
-
- auto *op = createNode<luci::CircleEqual>();
- op->x(x_input);
- op->y(y_input);
-
- auto kernel = buildKernel<kernels::Equal>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->x(), x_input);
- checkTensor(kernel->y(), y_input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, FullyConnected)
-{
- auto *input = createInputNode();
- auto *weights = createInputNode();
- auto *bias = createInputNode();
-
- auto *op = createNode<luci::CircleFullyConnected>();
- op->input(input);
- op->weights(weights);
- op->bias(bias);
-
- op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
- auto kernel = buildKernel<kernels::FullyConnected>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->weights(), weights);
- checkTensor(kernel->bias(), bias);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, Greater)
-{
- auto *x_input = createInputNode();
- auto *y_input = createInputNode();
-
- auto *op = createNode<luci::CircleGreater>();
- op->x(x_input);
- op->y(y_input);
-
- auto kernel = buildKernel<kernels::Greater>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->x(), x_input);
- checkTensor(kernel->y(), y_input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, GreaterEqual)
-{
- auto *x_input = createInputNode();
- auto *y_input = createInputNode();
-
- auto *op = createNode<luci::CircleGreaterEqual>();
- op->x(x_input);
- op->y(y_input);
-
- auto kernel = buildKernel<kernels::GreaterEqual>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->x(), x_input);
- checkTensor(kernel->y(), y_input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, InstanceNorm)
-{
- auto *input = createInputNode();
- auto *gamma = createInputNode();
- auto *beta = createInputNode();
-
- auto *op = createNode<luci::CircleInstanceNorm>();
- op->input(input);
- op->gamma(gamma);
- op->beta(beta);
-
- op->epsilon(1e-05);
- op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
- auto kernel = buildKernel<kernels::InstanceNorm>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->gamma(), gamma);
- checkTensor(kernel->beta(), beta);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().epsilon, Eq(op->epsilon()));
- EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, L2Normalize)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleL2Normalize>();
- op->x(input);
-
- op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
- auto kernel = buildKernel<kernels::L2Normalize>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, L2Pool2D)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleL2Pool2D>();
- op->value(input);
-
- op->padding(luci::Padding::SAME);
- op->filter()->h(11);
- op->filter()->w(13);
- op->stride()->h(17);
- op->stride()->w(19);
- op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
- auto kernel = buildKernel<kernels::L2Pool2D>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
- EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
- EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
- EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
- EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
- EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, LeakyRelu)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleLeakyRelu>();
- op->features(input);
-
- op->alpha(11.0f);
-
- auto kernel = buildKernel<kernels::LeakyRelu>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().alpha, Eq(op->alpha()));
-}
-
-TEST_F(KernelBuilderTest, Less)
-{
- auto *x_input = createInputNode();
- auto *y_input = createInputNode();
-
- auto *op = createNode<luci::CircleLess>();
- op->x(x_input);
- op->y(y_input);
-
- auto kernel = buildKernel<kernels::Less>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->x(), x_input);
- checkTensor(kernel->y(), y_input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, LessEqual)
-{
- auto *x_input = createInputNode();
- auto *y_input = createInputNode();
-
- auto *op = createNode<luci::CircleLessEqual>();
- op->x(x_input);
- op->y(y_input);
-
- auto kernel = buildKernel<kernels::LessEqual>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->x(), x_input);
- checkTensor(kernel->y(), y_input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, LocalResponseNormalization)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleLocalResponseNormalization>();
- op->input(input);
-
- op->radius(11);
- op->bias(13.0f);
- op->alpha(15.0f);
- op->beta(17.0f);
-
- auto kernel = buildKernel<kernels::LocalResponseNormalization>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().radius, Eq(op->radius()));
- EXPECT_THAT(kernel->params().bias, Eq(op->bias()));
- EXPECT_THAT(kernel->params().alpha, Eq(op->alpha()));
- EXPECT_THAT(kernel->params().beta, Eq(op->beta()));
-}
-
-TEST_F(KernelBuilderTest, LogicalAnd)
-{
- auto *input1 = createInputNode();
- auto *input2 = createInputNode();
-
- auto *op = createNode<luci::CircleLogicalAnd>();
- op->x(input1);
- op->y(input2);
-
- auto kernel = buildKernel<kernels::LogicalAnd>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input1(), input1);
- checkTensor(kernel->input2(), input2);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, LogicalNot)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleLogicalNot>();
- op->x(input);
-
- auto kernel = buildKernel<kernels::LogicalNot>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, LogicalOr)
-{
- auto *input1 = createInputNode();
- auto *input2 = createInputNode();
-
- auto *op = createNode<luci::CircleLogicalOr>();
- op->x(input1);
- op->y(input2);
-
- auto kernel = buildKernel<kernels::LogicalOr>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input1(), input1);
- checkTensor(kernel->input2(), input2);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Logistic)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleLogistic>();
- op->x(input);
-
- auto kernel = buildKernel<kernels::Logistic>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, LogSoftmax)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleLogSoftmax>();
- op->logits(input);
-
- auto kernel = buildKernel<kernels::LogSoftmax>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Maximum)
-{
- auto *input1 = createInputNode();
- auto *input2 = createInputNode();
-
- auto *op = createNode<luci::CircleMaximum>();
- op->x(input1);
- op->y(input2);
-
- auto kernel = buildKernel<kernels::Maximum>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input1(), input1);
- checkTensor(kernel->input2(), input2);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, MaxPool2D)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleMaxPool2D>();
- op->value(input);
-
- op->padding(luci::Padding::SAME);
- op->filter()->h(11);
- op->filter()->w(13);
- op->stride()->h(17);
- op->stride()->w(19);
- op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
- auto kernel = buildKernel<kernels::MaxPool2D>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
- EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
- EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
- EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
- EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
- EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, Mean)
-{
- auto *input = createInputNode();
- auto *axes = createInputNode();
-
- auto *op = createNode<luci::CircleMean>();
- op->input(input);
- op->reduction_indices(axes);
-
- op->keep_dims(true);
-
- auto kernel = buildKernel<kernels::Mean>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->axes(), axes);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().keep_dims, Eq(op->keep_dims()));
-}
-
-TEST_F(KernelBuilderTest, Minimum)
-{
- auto *input1 = createInputNode();
- auto *input2 = createInputNode();
-
- auto *op = createNode<luci::CircleMinimum>();
- op->x(input1);
- op->y(input2);
-
- auto kernel = buildKernel<kernels::Minimum>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input1(), input1);
- checkTensor(kernel->input2(), input2);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Mul)
-{
- auto *input1 = createInputNode();
- auto *input2 = createInputNode();
-
- auto *op = createNode<luci::CircleMul>();
- op->x(input1);
- op->y(input2);
-
- op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
- auto kernel = buildKernel<kernels::Mul>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input1(), input1);
- checkTensor(kernel->input2(), input2);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, Neg)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleNeg>();
- op->x(input);
-
- auto kernel = buildKernel<kernels::Neg>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, NotEqual)
-{
- auto *x_input = createInputNode();
- auto *y_input = createInputNode();
-
- auto *op = createNode<luci::CircleNotEqual>();
- op->x(x_input);
- op->y(y_input);
-
- auto kernel = buildKernel<kernels::NotEqual>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->x(), x_input);
- checkTensor(kernel->y(), y_input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, OneHot)
-{
- auto *indices = createInputNode();
- auto *depth = createInputNode();
- auto *on_value = createInputNode();
- auto *off_value = createInputNode();
- auto axis = 1;
-
- auto *op = createNode<luci::CircleOneHot>();
- op->indices(indices);
- op->depth(depth);
- op->on_value(on_value);
- op->off_value(off_value);
- op->axis(axis);
-
- auto kernel = buildKernel<kernels::OneHot>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->indices(), indices);
- checkTensor(kernel->depth(), depth);
- checkTensor(kernel->on_value(), on_value);
- checkTensor(kernel->off_value(), off_value);
- EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
-}
-
-TEST_F(KernelBuilderTest, Pad)
-{
- auto *input = createInputNode();
- auto *paddings = createInputNode();
-
- auto *op = createNode<luci::CirclePad>();
- op->input(input);
- op->paddings(paddings);
-
- auto kernel = buildKernel<kernels::Pad>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->paddings(), paddings);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, PadV2)
-{
- auto *input = createInputNode();
- auto *paddings = createInputNode();
- auto *constant_values = createInputNode();
-
- auto *op = createNode<luci::CirclePadV2>();
- op->input(input);
- op->paddings(paddings);
- op->constant_values(constant_values);
-
- auto kernel = buildKernel<kernels::PadV2>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->paddings(), paddings);
- checkTensor(kernel->constant_values(), constant_values);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Pow)
-{
- auto *input1 = createInputNode();
- auto *input2 = createInputNode();
-
- auto *op = createNode<luci::CirclePow>();
- op->x(input1);
- op->y(input2);
-
- auto kernel = buildKernel<kernels::Pow>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input1(), input1);
- checkTensor(kernel->input2(), input2);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, PRelu)
-{
- auto *input = createInputNode();
- auto *alpha = createInputNode();
-
- auto *op = createNode<luci::CirclePRelu>();
- op->input(input);
- op->alpha(alpha);
-
- auto kernel = buildKernel<kernels::PRelu>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->alpha(), alpha);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Relu)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleRelu>();
- op->features(input);
-
- auto kernel = buildKernel<kernels::Relu>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Relu6)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleRelu6>();
- op->features(input);
-
- auto kernel = buildKernel<kernels::Relu6>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Reshape)
-{
- auto *input = createInputNode();
- auto *shape = createInputNode();
-
- auto *op = createNode<luci::CircleReshape>();
- op->tensor(input);
- op->shape(shape);
-
- auto kernel = buildKernel<kernels::Reshape>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->shape(), shape);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, ResizeBilinear)
-{
- auto *input = createInputNode();
- auto *size = createInputNode();
-
- auto *op = createNode<luci::CircleResizeBilinear>();
- op->input(input);
- op->size(size);
- op->align_corners(true);
- op->half_pixel_centers(true);
-
- auto kernel = buildKernel<kernels::ResizeBilinear>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->size(), size);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners()));
- EXPECT_THAT(kernel->params().half_pixel_centers, Eq(op->half_pixel_centers()));
-}
-
-TEST_F(KernelBuilderTest, ResizeNearestNeighbor)
-{
- auto *input = createInputNode();
- auto *size = createInputNode();
-
- auto *op = createNode<luci::CircleResizeNearestNeighbor>();
- op->input(input);
- op->size(size);
- op->align_corners(true);
-
- auto kernel = buildKernel<kernels::ResizeNearestNeighbor>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->size(), size);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners()));
- // TODO currently half_pixel_centers are not implemented on CircleResizeNearestNeighbor
- // after adding, need to be updated.
-}
-
-TEST_F(KernelBuilderTest, ReverseV2)
-{
- auto *input = createInputNode();
- auto *axes = createInputNode();
-
- auto *op = createNode<luci::CircleReverseV2>();
- op->tensor(input);
- op->axis(axes);
-
- auto kernel = buildKernel<kernels::ReverseV2>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->axes(), axes);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Rsqrt)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleRsqrt>();
- op->x(input);
-
- auto kernel = buildKernel<kernels::Rsqrt>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Slice)
-{
- auto *input = createInputNode();
- auto *begin = createInputNode();
- auto *size = createInputNode();
-
- auto *op = createNode<luci::CircleSlice>();
- op->input(input);
- op->begin(begin);
- op->size(size);
-
- auto kernel = buildKernel<kernels::Slice>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->begin(), begin);
- checkTensor(kernel->size(), size);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Softmax)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleSoftmax>();
- op->logits(input);
-
- op->beta(11.0f);
-
- auto kernel = buildKernel<kernels::Softmax>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().beta, Eq(op->beta()));
-}
-
-TEST_F(KernelBuilderTest, SpaceToDepth)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleSpaceToDepth>();
- op->input(input);
-
- op->block_size(11);
-
- auto kernel = buildKernel<kernels::SpaceToDepth>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().block_size, op->block_size());
-}
-
-TEST_F(KernelBuilderTest, Split)
-{
- auto *axis = createInputNode();
- auto *input = createInputNode();
- auto *op = createNode<luci::CircleSplit>();
- auto *output1 = createNodeOut<luci::CircleSplitOut>(op, 0);
- auto *output2 = createNodeOut<luci::CircleSplitOut>(op, 1);
-
- op->split_dim(axis);
- op->input(input);
-
- op->num_split(2);
-
- auto kernel = buildKernel<kernels::Split>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->axis(), axis);
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(0), output1);
- checkTensor(kernel->output(1), output2);
-}
-
-TEST_F(KernelBuilderTest, SplitV)
-{
- auto *input = createInputNode();
- auto *size_splits = createInputNode();
- auto *axis = createInputNode();
- auto *op = createNode<luci::CircleSplitV>();
- auto *output0 = createNodeOut<luci::CircleSplitVOut>(op, 0);
- auto *output1 = createNodeOut<luci::CircleSplitVOut>(op, 1);
-
- op->input(input);
- op->size_splits(size_splits);
- op->split_dim(axis);
-
- op->num_split(2);
-
- auto kernel = buildKernel<kernels::SplitV>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->size_splits(), size_splits);
- checkTensor(kernel->axis(), axis);
- checkTensor(kernel->output(0), output0);
- checkTensor(kernel->output(1), output1);
-}
-
-TEST_F(KernelBuilderTest, Sqrt)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleSqrt>();
- op->x(input);
-
- auto kernel = buildKernel<kernels::Sqrt>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, SquaredDifference)
-{
- auto *input1 = createInputNode();
- auto *input2 = createInputNode();
-
- auto *op = createNode<luci::CircleSquaredDifference>();
- op->x(input1);
- op->y(input2);
-
- auto kernel = buildKernel<kernels::SquaredDifference>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input1(), input1);
- checkTensor(kernel->input2(), input2);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Squeeze)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleSqueeze>();
- op->input(input);
-
- op->squeeze_dims({11, 13});
-
- auto kernel = buildKernel<kernels::Squeeze>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().squeeze_dims, ElementsAreArray(op->squeeze_dims()));
-}
-
-TEST_F(KernelBuilderTest, StridedSlice)
-{
- auto *input = createInputNode();
- auto *begin = createInputNode();
- auto *end = createInputNode();
- auto *strides = createInputNode();
-
- auto *op = createNode<luci::CircleStridedSlice>();
- op->input(input);
- op->begin(begin);
- op->end(end);
- op->strides(strides);
-
- op->begin_mask(11);
- op->ellipsis_mask(13);
- op->end_mask(17);
- op->new_axis_mask(19);
- op->shrink_axis_mask(23);
-
- auto kernel = buildKernel<kernels::StridedSlice>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->begin(), begin);
- checkTensor(kernel->end(), end);
- checkTensor(kernel->strides(), strides);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().begin_mask, Eq(op->begin_mask()));
- EXPECT_THAT(kernel->params().ellipsis_mask, Eq(op->ellipsis_mask()));
- EXPECT_THAT(kernel->params().end_mask, Eq(op->end_mask()));
- EXPECT_THAT(kernel->params().new_axis_mask, Eq(op->new_axis_mask()));
- EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask()));
-}
-
-TEST_F(KernelBuilderTest, Sub)
-{
- auto *input1 = createInputNode();
- auto *input2 = createInputNode();
-
- auto *op = createNode<luci::CircleSub>();
- op->x(input1);
- op->y(input2);
-
- op->fusedActivationFunction(luci::FusedActFunc::RELU);
-
- auto kernel = buildKernel<kernels::Sub>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input1(), input1);
- checkTensor(kernel->input2(), input2);
- checkTensor(kernel->output(), op);
- EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
-}
-
-TEST_F(KernelBuilderTest, Tanh)
-{
- auto *input = createInputNode();
-
- auto *op = createNode<luci::CircleTanh>();
- op->x(input);
-
- auto kernel = buildKernel<kernels::Tanh>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, Transpose)
-{
- auto *input = createInputNode();
- auto *perm = createInputNode();
-
- auto *op = createNode<luci::CircleTranspose>();
- op->a(input);
- op->perm(perm);
-
- auto kernel = buildKernel<kernels::Transpose>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->perm(), perm);
- checkTensor(kernel->output(), op);
-}
-
-TEST_F(KernelBuilderTest, TransposeConv)
-{
- auto *output_shape = createInputNode();
- auto *filter = createInputNode();
- auto *input = createInputNode();
- auto *bias = createInputNode();
-
- auto *op = createNode<luci::CircleTransposeConv>();
- op->inputSizes(output_shape);
- op->filter(filter);
- op->outBackprop(input);
- op->bias(bias);
-
- op->padding(luci::Padding::SAME);
- op->stride()->h(11);
- op->stride()->w(13);
-
- auto kernel = buildKernel<kernels::TransposeConv>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->output_shape(), output_shape);
- checkTensor(kernel->filter(), filter);
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(), op);
- checkTensor(kernel->bias(), bias);
- EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
- EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
- EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
-}
-
-TEST_F(KernelBuilderTest, Unpack)
-{
- auto *input = createInputNode();
- auto *op = createNode<luci::CircleUnpack>();
- auto *output1 = createNodeOut<luci::CircleUnpackOut>(op, 0);
- auto *output2 = createNodeOut<luci::CircleUnpackOut>(op, 1);
-
- op->value(input);
-
- op->num(2);
- op->axis(11);
-
- auto kernel = buildKernel<kernels::Unpack>(op);
- ASSERT_THAT(kernel, NotNull());
-
- checkTensor(kernel->input(), input);
- checkTensor(kernel->output(0), output1);
- checkTensor(kernel->output(1), output2);
- EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
-}
-
-TEST_F(KernelBuilderTest, NonExisting1_NEG)
-{
- auto *op = createNode<luci::CircleConst>();
- ASSERT_ANY_THROW(buildKernel<Kernel>(op));
-}
-
-TEST_F(KernelBuilderTest, NonExisting2_NEG)
-{
- auto *op = createNode<luci::CircleInput>();
- ASSERT_ANY_THROW(buildKernel<Kernel>(op));
-}
-
-TEST_F(KernelBuilderTest, NonExisting3_NEG)
-{
- auto *op = createNode<luci::CircleOutput>();
- ASSERT_ANY_THROW(buildKernel<Kernel>(op));
-}
-
-} // namespace
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "loader/KernelBuilderHelper.h"
-
-#include <luci/IR/Nodes/CircleOutput.h>
-
-namespace luci_interpreter
-{
-
-const Tensor *KernelBuilderHelper::getInputTensor(const loco::Node *node) const
-{
- const Tensor *tensor = _node_to_tensor.at(node);
- assert(tensor != nullptr);
- return tensor;
-}
-
-const Tensor *KernelBuilderHelper::getOptionalInputTensor(const loco::Node *node) const
-{
- if (dynamic_cast<const luci::CircleOutputExclude *>(node))
- {
- return nullptr;
- }
- return getInputTensor(node);
-}
-
-Tensor *KernelBuilderHelper::getOutputTensor(const loco::Node *node) const
-{
- Tensor *tensor = _node_to_tensor.at(node);
- assert(tensor != nullptr);
- return tensor;
-}
-
-std::vector<Tensor *>
-KernelBuilderHelper::getOutputTensors(const std::vector<const loco::Node *> &nodes) const
-{
- std::vector<Tensor *> tensors;
- tensors.reserve(nodes.size());
- for (const loco::Node *node : nodes)
- tensors.push_back(getOutputTensor(node));
- return tensors;
-}
-
-RuntimeGraph *KernelBuilderHelper::getRuntimeGraph(const loco::Graph *graph) const
-{
- RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
- assert(runtime_graph != nullptr);
- return runtime_graph;
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
-#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
-
-#include "core/Kernel.h"
-#include "core/RuntimeGraph.h"
-
-#include <loco/IR/Graph.h>
-#include <loco/IR/Node.h>
-
-#include <vector>
-#include <unordered_map>
-
-namespace luci_interpreter
-{
-
-class KernelBuilderHelper
-{
-public:
- KernelBuilderHelper(
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
- : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
- {
- }
-
-public:
- const Tensor *getInputTensor(const loco::Node *node) const;
- const Tensor *getOptionalInputTensor(const loco::Node *node) const;
-
- Tensor *getOutputTensor(const loco::Node *node) const;
- std::vector<Tensor *> getOutputTensors(const std::vector<const loco::Node *> &nodes) const;
-
- RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const;
-
-public:
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph() const
- {
- return _graph_to_runtime_graph;
- }
-
- const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor() const
- {
- return _node_to_tensor;
- }
-
-private:
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
- const std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
-};
-
-template <typename CircleNodeOut>
-std::vector<const loco::Node *> collectOutputNodes(const loco::Node *node)
-{
- std::vector<const CircleNodeOut *> output_nodes;
- for (const loco::Node *loco_node : loco::succs(node))
- {
- output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node));
- }
- std::sort(output_nodes.begin(), output_nodes.end(),
- [](const CircleNodeOut *node1, const CircleNodeOut *node2) {
- return node1->index() < node2->index();
- });
- return {output_nodes.cbegin(), output_nodes.cend()};
-}
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ModuleLoader.h"
-
-#include "GraphLoader.h"
-
-namespace luci_interpreter
-{
-
-ModuleLoader::ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
- RuntimeToIR &runtime_to_ir,
- std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
- IMemoryManager *memory_manager)
- : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir),
- _node_to_tensor(node_to_tensor), _memory_manager(memory_manager)
-{
-}
-
-void ModuleLoader::load()
-{
- // Runtime graphs have to be created in advance, because they will be needed during the loading
- // process for control flow nodes.
- for (size_t i = 0; i < _module->size(); ++i)
- {
- _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph(_memory_manager));
- }
- for (size_t i = 0; i < _module->size(); ++i)
- {
- const loco::Graph *graph = _module->graph(i);
- RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
- GraphLoader loader(graph, runtime_graph, _runtime_to_ir, _graph_to_runtime_graph,
- _node_to_tensor, _memory_manager);
- loader.loadTensors();
- loader.initInputOutputTensors();
- loader.loadOperators();
- }
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_LOADER_MODULELOADER_H
-#define LUCI_INTERPRETER_LOADER_MODULELOADER_H
-
-#include "core/RuntimeModule.h"
-#include "loader/RuntimeToIR.h"
-#include "luci_interpreter/MemoryManager.h"
-
-#include <luci/IR/Module.h>
-
-#include <unordered_map>
-
-namespace luci_interpreter
-{
-
-class ModuleLoader
-{
-public:
- ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
- RuntimeToIR &runtime_to_ir,
- std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
- IMemoryManager *memory_manager);
-
- void load();
-
-private:
- IMemoryManager *_memory_manager;
- const luci::Module *_module;
- RuntimeModule *_runtime_module;
- RuntimeToIR &_runtime_to_ir;
- std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
- std::unordered_map<const loco::Graph *, RuntimeGraph *> _graph_to_runtime_graph;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_LOADER_MODULELOADER_H
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
-#define LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
-
-#include "luci_interpreter/core/Tensor.h"
-
-#include <luci/IR/CircleNode.h>
-
-#include <unordered_map>
-
-namespace luci_interpreter
-{
-
-// Maps runtime entities back to IR entities. It is used to implement observing functionality.
-struct RuntimeToIR
-{
- std::unordered_map<const Tensor *, const luci::CircleNode *> tensor_to_node;
- std::unordered_map<const Kernel *, const luci::CircleNode *> kernel_to_node;
-};
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Add.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleAdd(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleAdd *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input1 = helper.getInputTensor(node->x());
- const Tensor *input2 = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- AddParams params{};
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::Add>(input1, input2, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/ArgMax.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleArgMax(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleArgMax *>(circle_node);
- assert(node->arity() == 2);
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *axis = helper.getInputTensor(node->dimension());
- Tensor *output = helper.getOutputTensor(node);
-
- ArgMaxParams params{};
- params.output_type = node->output_type();
-
- return std::make_unique<kernels::ArgMax>(input, axis, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/AveragePool2D.h"
-#include <luci/Plan/CircleNodeExecutionPlan.h>
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleAveragePool2D *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->value());
- Tensor *output = helper.getOutputTensor(node);
-
- Pool2DParams params{};
- params.padding = node->padding();
- params.filter_height = node->filter()->h();
- params.filter_width = node->filter()->w();
- params.stride_height = node->stride()->h();
- params.stride_width = node->stride()->w();
- params.activation = node->fusedActivationFunction();
-
- // It is unknown what data will be stored in scratchpad tensor,
- // using UINT8 as a most general option
- auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
- scratchpad->set_observable(false);
- scratchpad->set_data_buffer(nullptr);
- // If node has execution plan then read memory offsets for scratchpad temporary tensor
- // from the beginning of shared memory buffer.
- // Used in Static Memory Manager.
- // TODO move tensors offset initialization to one place
- if (luci::has_execution_plan(node))
- {
- const auto execution_plan = luci::get_execution_plan(node);
- // Check whether the offset for the current CircleConv2D temporary was found.
- if (execution_plan.offsets().size() > 1)
- // If this is true, then we keep this offset in scratchpad.
- scratchpad->set_offset(execution_plan.offsets().at(1));
- }
- Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
-
- return std::make_unique<kernels::AveragePool2D>(input, output, tmp, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/BatchMatMul.h"
-#include <luci/Plan/CircleNodeExecutionPlan.h>
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleBatchMatMul *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *lhs = helper.getInputTensor(node->x());
- const Tensor *rhs = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- auto lhs_scratchpad =
- std::make_unique<Tensor>(lhs->element_type(), Shape({}), AffineQuantization{}, "");
- lhs_scratchpad->set_observable(false);
- lhs_scratchpad->set_data_buffer(nullptr);
- auto rhs_scratchpad =
- std::make_unique<Tensor>(rhs->element_type(), Shape({}), AffineQuantization{}, "");
- rhs_scratchpad->set_observable(false);
- rhs_scratchpad->set_data_buffer(nullptr);
- // If node has execution plan then read memory offsets for scratchpad temporary tensor
- // from the beginning of shared memory buffer.
- // Used in Static Memory Manager.
- // TODO move tensors offset initialization to one place
- if (luci::has_execution_plan(node))
- {
- const auto execution_plan = luci::get_execution_plan(node);
- // Check whether the offset for the current BatchMatMul temporary was found.
- if (execution_plan.offsets().size() > 1)
- {
- assert(execution_plan.offsets().size() == 3);
-
- // If this is true, then we keep this offset in scratchpad.
- lhs_scratchpad->set_offset(execution_plan.offsets().at(1));
- rhs_scratchpad->set_offset(execution_plan.offsets().at(2));
- }
- }
- Tensor *lhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(lhs_scratchpad));
- Tensor *rhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(rhs_scratchpad));
-
- BatchMatMulParams params;
- params.adj_x = node->adj_x();
- params.adj_y = node->adj_y();
-
- return std::make_unique<kernels::BatchMatMul>(lhs, rhs, output, lhs_tmp, rhs_tmp, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/BatchToSpaceND.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleBatchToSpaceND(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleBatchToSpaceND *>(circle_node);
- assert(node->arity() == 3);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *block_shape = helper.getInputTensor(node->block_shape());
- const Tensor *crops = helper.getInputTensor(node->crops());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
-#define LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
-
-#include "loader/KernelBuilderHelper.h"
-
-#include "luci/IR/CircleNodes.h"
-
-namespace luci_interpreter
-{
-
-#define REGISTER_KERNEL(name) \
- std::unique_ptr<Kernel> build_kernel_Circle##name(const luci::CircleNode *circle_node, \
- KernelBuilderHelper &helper);
-
-#include "KernelsToBuild.lst"
-
-#undef REGISTER_KERNEL
-
-} // namespace luci_interpreter
-
-#endif // LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Cast.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleCast(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleCast *>(circle_node);
-
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->x());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Cast>(input, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Concatenation.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleConcatenation(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleConcatenation *>(circle_node);
- std::vector<const Tensor *> inputs(node->numValues());
- for (uint32_t i = 0; i < node->numValues(); ++i)
- {
- inputs[i] = helper.getInputTensor(node->values(i));
- }
- Tensor *output = helper.getOutputTensor(node);
-
- ConcatenationParams params{};
- params.axis = node->axis();
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Conv2D.h"
-#include <luci/Plan/CircleNodeExecutionPlan.h>
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleConv2D *>(circle_node);
- assert(node->arity() == 3);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *filter = helper.getInputTensor(node->filter());
- const Tensor *bias = helper.getOptionalInputTensor(node->bias());
- Tensor *output = helper.getOutputTensor(node);
-
- // It is unknown what data will be stored in scratchpad tensor,
- // using UINT8 as a most general option
- auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
- scratchpad->set_observable(false);
- scratchpad->set_data_buffer(nullptr);
- // If node has execution plan then read memory offsets for scratchpad temporary tensor
- // from the beginning of shared memory buffer.
- // Used in Static Memory Manager.
- // TODO move tensors offset initialization to one place
- if (luci::has_execution_plan(node))
- {
- const auto execution_plan = luci::get_execution_plan(node);
- // Check whether the offset for the current CircleConv2D temporary was found.
- if (execution_plan.offsets().size() > 1)
- // If this is true, then we keep this offset in scratchpad.
- scratchpad->set_offset(execution_plan.offsets().at(1));
- }
- Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
-
- Conv2DParams params{};
- params.padding = node->padding();
- params.stride_height = node->stride()->h();
- params.stride_width = node->stride()->w();
- params.dilation_height_factor = node->dilation()->h();
- params.dilation_width_factor = node->dilation()->w();
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::Conv2D>(input, filter, bias, output, tmp, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/DepthToSpace.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleDepthToSpace(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleDepthToSpace *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->input());
- Tensor *output = helper.getOutputTensor(node);
-
- DepthToSpaceParams params{};
- params.block_size = node->block_size();
-
- return std::make_unique<kernels::DepthToSpace>(input, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/DepthwiseConv2D.h"
-#include <luci/Plan/CircleNodeExecutionPlan.h>
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleDepthwiseConv2D *>(circle_node);
- assert(node->arity() == 3);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *filter = helper.getInputTensor(node->filter());
- const Tensor *bias = helper.getInputTensor(node->bias());
- Tensor *output = helper.getOutputTensor(node);
-
- DepthwiseConv2DParams params{};
- params.padding = node->padding();
- params.depth_multiplier = node->depthMultiplier();
- params.stride_height = node->stride()->h();
- params.stride_width = node->stride()->w();
- params.dilation_height_factor = node->dilation()->h();
- params.dilation_width_factor = node->dilation()->w();
- params.activation = node->fusedActivationFunction();
-
- // It is unknown what data will be stored in scratchpad tensor,
- // using UINT8 as a most general option
- auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
- scratchpad->set_observable(false);
- scratchpad->set_data_buffer(nullptr);
- // If node has execution plan then read memory offsets for scratchpad temporary tensor
- // from the beginning of shared memory buffer.
- // Used in Static Memory Manager.
- // TODO move tensors offset initialization to one place
- if (luci::has_execution_plan(node))
- {
- const auto execution_plan = luci::get_execution_plan(node);
- // Check whether the offset for the current CircleConv2D temporary was found.
- if (execution_plan.offsets().size() > 1)
- // If this is true, then we keep this offset in scratchpad.
- scratchpad->set_offset(execution_plan.offsets().at(1));
- }
- Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
-
- return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, tmp, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Dequantize.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleDequantize(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleDequantize *>(circle_node);
-
- const Tensor *input = helper.getInputTensor(node->input());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Dequantize>(input, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Div.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleDiv(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleDiv *>(circle_node);
- assert(node->arity() == 2);
- const Tensor *input1 = helper.getInputTensor(node->x());
- const Tensor *input2 = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- DivParams params{};
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::Div>(input1, input2, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Elu.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleElu(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleElu *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->features());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Elu>(input, output);
-}
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Equal.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleEqual(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-
-{
- const auto *node = loco::must_cast<const luci::CircleEqual *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *x = helper.getInputTensor(node->x());
- const Tensor *y = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Equal>(x, y, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Exp.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleExp(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleExp *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->x());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Exp>(input, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/ExpandDims.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleExpandDims(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleExpandDims *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *axis = helper.getInputTensor(node->axis());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::ExpandDims>(input, axis, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Fill.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleFill(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleFill *>(circle_node);
- assert(node->arity() == 2);
-
- const auto dims = helper.getInputTensor(node->dims());
- const auto value = helper.getInputTensor(node->value());
- auto output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Fill>(dims, value, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Floor.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleFloor(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleFloor *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->x());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Floor>(input, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/FloorDiv.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleFloorDiv(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleFloorDiv *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *x = helper.getInputTensor(node->x());
- const Tensor *y = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::FloorDiv>(x, y, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/FullyConnected.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleFullyConnected *>(circle_node);
- assert(node->arity() == 3);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *weights = helper.getInputTensor(node->weights());
- const Tensor *bias = helper.getOptionalInputTensor(node->bias());
- Tensor *output = helper.getOutputTensor(node);
-
- FullyConnectedParams params{};
- params.activation = node->fusedActivationFunction();
- params.keep_num_dims = node->keep_num_dims();
-
- return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Gather.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleGather(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleGather *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *params = helper.getInputTensor(node->params());
- const Tensor *indices = helper.getInputTensor(node->indices());
- Tensor *output = helper.getOutputTensor(node);
-
- GatherParams gparams{};
- gparams.axis = node->axis();
- // TODO support batch_dims
- gparams.batch_dims = 0;
-
- return std::make_unique<kernels::Gather>(params, indices, output, gparams);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Greater.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleGreater(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleGreater *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *x = helper.getInputTensor(node->x());
- const Tensor *y = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Greater>(x, y, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/GreaterEqual.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleGreaterEqual(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleGreaterEqual *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *x = helper.getInputTensor(node->x());
- const Tensor *y = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::GreaterEqual>(x, y, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/If.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleIf(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleIf *>(circle_node);
- auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node);
- assert(node->arity() == 1 + node->input_count());
- assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
-
- const Tensor *cond = helper.getInputTensor(node->cond());
- std::vector<const Tensor *> inputs(node->input_count());
- for (uint32_t i = 0; i < node->input_count(); ++i)
- {
- inputs[i] = helper.getInputTensor(node->input(i));
- }
- std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
-
- RuntimeGraph *then_graph = helper.getRuntimeGraph(node->then_graph());
- RuntimeGraph *else_graph = helper.getRuntimeGraph(node->else_graph());
-
- return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph,
- else_graph);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/InstanceNorm.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleInstanceNorm(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleInstanceNorm *>(circle_node);
- assert(node->arity() == 3);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *gamma = helper.getInputTensor(node->gamma());
- const Tensor *beta = helper.getInputTensor(node->beta());
-
- Tensor *output = helper.getOutputTensor(node);
-
- InstanceNormParams params{};
- params.epsilon = node->epsilon();
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::InstanceNorm>(input, gamma, beta, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/L2Normalize.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleL2Normalize(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleL2Normalize *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->x());
- Tensor *output = helper.getOutputTensor(node);
-
- L2NormParams params{};
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::L2Normalize>(input, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/L2Pool2D.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleL2Pool2D(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleL2Pool2D *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->value());
- Tensor *output = helper.getOutputTensor(node);
-
- Pool2DParams params{};
- params.padding = node->padding();
- params.filter_height = node->filter()->h();
- params.filter_width = node->filter()->w();
- params.stride_height = node->stride()->h();
- params.stride_width = node->stride()->w();
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::L2Pool2D>(input, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/LeakyRelu.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleLeakyRelu(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleLeakyRelu *>(circle_node);
- assert(node->arity() == 1);
- const Tensor *input = helper.getInputTensor(node->features());
- Tensor *output = helper.getOutputTensor(node);
-
- LeakyReluParams params{};
- params.alpha = node->alpha();
-
- return std::make_unique<kernels::LeakyRelu>(input, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Less.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleLess(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleLess *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *x = helper.getInputTensor(node->x());
- const Tensor *y = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Less>(x, y, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/LessEqual.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleLessEqual(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleLessEqual *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *x = helper.getInputTensor(node->x());
- const Tensor *y = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::LessEqual>(x, y, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/LocalResponseNormalization.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel>
-build_kernel_CircleLocalResponseNormalization(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleLocalResponseNormalization *>(circle_node);
- assert(node->arity() == 1);
- const Tensor *input = helper.getInputTensor(node->input());
- Tensor *output = helper.getOutputTensor(node);
-
- LocalResponseNormalizationParams params{};
- params.radius = node->radius();
- params.bias = node->bias();
- params.alpha = node->alpha();
- params.beta = node->beta();
-
- return std::make_unique<kernels::LocalResponseNormalization>(input, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/LogSoftmax.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleLogSoftmax(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleLogSoftmax *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->logits());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::LogSoftmax>(input, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/LogicalAnd.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleLogicalAnd(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleLogicalAnd *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input1 = helper.getInputTensor(node->x());
- const Tensor *input2 = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::LogicalAnd>(input1, input2, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/LogicalNot.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleLogicalNot(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleLogicalNot *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->x());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::LogicalNot>(input, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/LogicalOr.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleLogicalOr(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleLogicalOr *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input1 = helper.getInputTensor(node->x());
- const Tensor *input2 = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::LogicalOr>(input1, input2, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Logistic.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleLogistic(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleLogistic *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->x());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Logistic>(input, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/MaxPool2D.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleMaxPool2D(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleMaxPool2D *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->value());
- Tensor *output = helper.getOutputTensor(node);
-
- Pool2DParams params{};
- params.padding = node->padding();
- params.filter_height = node->filter()->h();
- params.filter_width = node->filter()->w();
- params.stride_height = node->stride()->h();
- params.stride_width = node->stride()->w();
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::MaxPool2D>(input, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Maximum.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleMaximum(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleMaximum *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input1 = helper.getInputTensor(node->x());
- const Tensor *input2 = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Maximum>(input1, input2, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Mean.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleMean(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleMean *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *axes = helper.getInputTensor(node->reduction_indices());
- Tensor *output = helper.getOutputTensor(node);
-
- auto temp_index_unique =
- std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
- temp_index_unique->set_observable(false);
- temp_index_unique->set_data_buffer(nullptr);
- Tensor *temp_index =
- helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique));
-
- auto resolved_axes_unique =
- std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
- resolved_axes_unique->set_observable(false);
- resolved_axes_unique->set_data_buffer(nullptr);
- Tensor *resolved_axes =
- helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique));
-
- auto temp_sum_unique =
- std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
- temp_sum_unique->set_observable(false);
- temp_sum_unique->set_data_buffer(nullptr);
- Tensor *temp_sum = helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_sum_unique));
-
- ReducerParams params{};
- params.keep_dims = node->keep_dims();
-
- return std::make_unique<kernels::Mean>(input, axes, output, temp_index, resolved_axes, temp_sum,
- params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Minimum.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleMinimum(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleMinimum *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input1 = helper.getInputTensor(node->x());
- const Tensor *input2 = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Minimum>(input1, input2, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/MirrorPad.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleMirrorPad(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleMirrorPad *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *paddings = helper.getInputTensor(node->paddings());
- Tensor *output = helper.getOutputTensor(node);
-
- MirrorPadParams params{};
- params.mode = node->mode();
-
- return std::make_unique<kernels::MirrorPad>(input, paddings, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Mul.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleMul(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleMul *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input1 = helper.getInputTensor(node->x());
- const Tensor *input2 = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- MulParams params{};
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::Mul>(input1, input2, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Neg.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleNeg(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleNeg *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->x());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Neg>(input, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/NotEqual.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleNotEqual(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleNotEqual *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *x = helper.getInputTensor(node->x());
- const Tensor *y = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::NotEqual>(x, y, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/OneHot.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleOneHot(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleOneHot *>(circle_node);
- assert(node->arity() == 4);
-
- const Tensor *indices = helper.getInputTensor(node->indices());
- const Tensor *depth = helper.getInputTensor(node->depth());
- const Tensor *on_value = helper.getInputTensor(node->on_value());
- const Tensor *off_value = helper.getInputTensor(node->off_value());
- Tensor *output = helper.getOutputTensor(node);
-
- OneHotParams params{};
- params.axis = node->axis();
-
- return std::make_unique<kernels::OneHot>(indices, depth, on_value, off_value, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/PRelu.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CirclePRelu(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CirclePRelu *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *alpha = helper.getInputTensor(node->alpha());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::PRelu>(input, alpha, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Pack.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CirclePack(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CirclePack *>(circle_node);
- assert(node->arity() == node->values_count());
-
- std::vector<const Tensor *> inputs(node->values_count());
- for (uint32_t i = 0; i < node->values_count(); ++i)
- {
- inputs[i] = helper.getInputTensor(node->values(i));
- }
- Tensor *output = helper.getOutputTensor(node);
-
- PackParams params{};
- params.axis = node->axis();
- params.values_count = node->values_count();
-
- return std::make_unique<kernels::Pack>(std::move(inputs), output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Pad.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CirclePad(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CirclePad *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *paddings = helper.getInputTensor(node->paddings());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Pad>(input, paddings, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/PadV2.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CirclePadV2(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CirclePadV2 *>(circle_node);
- assert(node->arity() == 3);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *paddings = helper.getInputTensor(node->paddings());
- const Tensor *constant_values = helper.getInputTensor(node->constant_values());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::PadV2>(input, paddings, constant_values, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Pow.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CirclePow(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CirclePow *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input1 = helper.getInputTensor(node->x());
- const Tensor *input2 = helper.getInputTensor(node->y());
-
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Pow>(input1, input2, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Quantize.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleQuantize(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleQuantize *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->input());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Quantize>(input, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Relu.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleRelu(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleRelu *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->features());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Relu>(input, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Relu6.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleRelu6(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleRelu6 *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->features());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Relu6>(input, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Reshape.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleReshape(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleReshape *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input = helper.getInputTensor(node->tensor());
- const Tensor *shape = helper.getInputTensor(node->shape());
- Tensor *output = helper.getOutputTensor(node);
-
- // NOTE 'newShape' attribute is ignored.
- return std::make_unique<kernels::Reshape>(input, shape, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/ResizeBilinear.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleResizeBilinear(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleResizeBilinear *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *size = helper.getInputTensor(node->size());
- Tensor *output = helper.getOutputTensor(node);
-
- ResizeBilinearParams params{};
- params.align_corners = node->align_corners();
- params.half_pixel_centers = node->half_pixel_centers();
-
- return std::make_unique<kernels::ResizeBilinear>(input, size, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/ResizeNearestNeighbor.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel>
-build_kernel_CircleResizeNearestNeighbor(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleResizeNearestNeighbor *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *size = helper.getInputTensor(node->size());
- Tensor *output = helper.getOutputTensor(node);
-
- ResizeNearestNeighborParams params{};
- params.align_corners = node->align_corners();
- // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated
- // Current CircleResizeNearestNeighbor don't have half_pixel_centers.
- // default value on current is false.
- // it need to be updated when CircleResizeNearestNeighbor updated.
- params.half_pixel_centers = false;
-
- return std::make_unique<kernels::ResizeNearestNeighbor>(input, size, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/ReverseV2.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleReverseV2(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleReverseV2 *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input = helper.getInputTensor(node->tensor());
- const Tensor *axes = helper.getInputTensor(node->axis());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::ReverseV2>(input, axes, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Rsqrt.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleRsqrt(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleRsqrt *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->x());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Rsqrt>(input, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/SVDF.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSVDF(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleSVDF *>(circle_node);
- assert(node->arity() == 5);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *feature = helper.getInputTensor(node->weight_feature());
- const Tensor *time = helper.getInputTensor(node->weight_time());
- const Tensor *bias = helper.getOptionalInputTensor(node->bias());
- const Tensor *input_activation_state = helper.getInputTensor(node->input_activation_state());
- Tensor *output = helper.getOutputTensor(node);
-
- auto scratchpad_tensor = std::make_unique<Tensor>(input_activation_state->element_type(),
- Shape({}), AffineQuantization{}, "");
- scratchpad_tensor->set_observable(false);
- scratchpad_tensor->set_data_buffer(nullptr);
- Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
-
- DataType data_type = input->element_type() == DataType::S8 ? DataType::S32 : DataType::FLOAT32;
-
- scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
- scratchpad_tensor->set_observable(false);
- scratchpad_tensor->set_data_buffer(nullptr);
- Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
-
- if (data_type == DataType::FLOAT32 &&
- (feature->element_type() == DataType::S8 || feature->element_type() == DataType::U8))
- {
- data_type = feature->element_type();
- }
-
- scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
- scratchpad_tensor->set_observable(false);
- scratchpad_tensor->set_data_buffer(nullptr);
- Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
-
- data_type = DataType::FLOAT32;
-
- scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
- scratchpad_tensor->set_observable(false);
- scratchpad_tensor->set_data_buffer(nullptr);
- Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
-
- scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
- scratchpad_tensor->set_observable(false);
- scratchpad_tensor->set_data_buffer(nullptr);
- Tensor *tmp_4 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
-
- scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
- scratchpad_tensor->set_observable(false);
- scratchpad_tensor->set_data_buffer(nullptr);
- Tensor *tmp_5 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
-
- scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
- scratchpad_tensor->set_observable(false);
- scratchpad_tensor->set_data_buffer(nullptr);
- Tensor *tmp_6 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
-
- SVDFParams params{};
- params.activation = node->fusedActivationFunction();
- params.svdf_rank = node->svdf_rank();
- params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs();
-
- return std::make_unique<kernels::SVDF>(input, feature, time, bias, input_activation_state, output,
- tmp, tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Shape.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleShape(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleShape *>(circle_node);
- assert(node->arity() == 1);
-
- const auto input = helper.getInputTensor(node->input());
- auto output = helper.getOutputTensor(node);
-
- ShapeParams shape_params{};
- shape_params.out_type = node->out_type();
-
- return std::make_unique<kernels::ShapeKernel>(input, output, shape_params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Slice.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSlice(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleSlice *>(circle_node);
- assert(node->arity() == 3);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *begin = helper.getInputTensor(node->begin());
- const Tensor *size = helper.getInputTensor(node->size());
-
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Slice>(input, begin, size, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Softmax.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSoftmax(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleSoftmax *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->logits());
- Tensor *output = helper.getOutputTensor(node);
-
- SoftmaxParams params{};
- params.beta = node->beta();
-
- return std::make_unique<kernels::Softmax>(input, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/SpaceToBatchND.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSpaceToBatchND(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleSpaceToBatchND *>(circle_node);
- assert(node->arity() == 3);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *block_shape = helper.getInputTensor(node->block_shape());
- const Tensor *paddings = helper.getInputTensor(node->paddings());
-
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/SpaceToDepth.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSpaceToDepth(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleSpaceToDepth *>(circle_node);
- assert(node->arity() == 1);
- const Tensor *input = helper.getInputTensor(node->input());
-
- Tensor *output = helper.getOutputTensor(node);
-
- SpaceToDepthParams params{};
- params.block_size = node->block_size();
-
- return std::make_unique<kernels::SpaceToDepth>(input, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Split.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSplit(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleSplit *>(circle_node);
- auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node);
- assert(node->arity() == 2);
- assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
-
- const Tensor *axis = helper.getInputTensor(node->split_dim());
- const Tensor *input = helper.getInputTensor(node->input());
- std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
-
- // NOTE 'num_splits' attribute is ignored.
- return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/SplitV.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSplitV(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleSplitV *>(circle_node);
- auto output_nodes = collectOutputNodes<luci::CircleSplitVOut>(node);
- assert(node->arity() == 3);
- assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *sizes_data = helper.getInputTensor(node->size_splits());
- const Tensor *axis = helper.getInputTensor(node->split_dim());
- std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
-
- // NOTE 'num_splits' attribute is ignored.
- return std::make_unique<kernels::SplitV>(input, sizes_data, axis, std::move(outputs));
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Sqrt.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSqrt(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleSqrt *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->x());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Sqrt>(input, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Square.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSquare(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleSquare *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->x());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Square>(input, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/SquaredDifference.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSquaredDifference(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleSquaredDifference *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input1 = helper.getInputTensor(node->x());
- const Tensor *input2 = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::SquaredDifference>(input1, input2, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Squeeze.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSqueeze(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleSqueeze *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->input());
- Tensor *output = helper.getOutputTensor(node);
-
- SqueezeParams params{};
- params.squeeze_dims = node->squeeze_dims();
-
- return std::make_unique<kernels::Squeeze>(input, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/StridedSlice.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleStridedSlice(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleStridedSlice *>(circle_node);
- assert(node->arity() == 4);
-
- const Tensor *input = helper.getInputTensor(node->input());
- const Tensor *begin = helper.getInputTensor(node->begin());
- const Tensor *end = helper.getInputTensor(node->end());
- const Tensor *strides = helper.getInputTensor(node->strides());
-
- Tensor *output = helper.getOutputTensor(node);
-
- StridedSliceParams params{};
- params.begin_mask = node->begin_mask();
- params.ellipsis_mask = node->ellipsis_mask();
- params.end_mask = node->end_mask();
- params.new_axis_mask = node->new_axis_mask();
- params.shrink_axis_mask = node->shrink_axis_mask();
-
- return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Sub.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleSub(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleSub *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input1 = helper.getInputTensor(node->x());
- const Tensor *input2 = helper.getInputTensor(node->y());
- Tensor *output = helper.getOutputTensor(node);
-
- SubParams params{};
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::Sub>(input1, input2, output, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Tanh.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleTanh(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleTanh *>(circle_node);
- assert(node->arity() == 1);
-
- const Tensor *input = helper.getInputTensor(node->x());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Tanh>(input, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Transpose.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleTranspose(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleTranspose *>(circle_node);
- assert(node->arity() == 2);
-
- const Tensor *input = helper.getInputTensor(node->a());
- const Tensor *perm = helper.getInputTensor(node->perm());
- Tensor *output = helper.getOutputTensor(node);
-
- return std::make_unique<kernels::Transpose>(input, perm, output);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/TransposeConv.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleTransposeConv(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleTransposeConv *>(circle_node);
- assert(node->arity() == 4);
-
- const Tensor *input_sizes = helper.getInputTensor(node->inputSizes());
- const Tensor *filter = helper.getInputTensor(node->filter());
- const Tensor *out_backprop = helper.getInputTensor(node->outBackprop());
- const Tensor *bias = helper.getOptionalInputTensor(node->bias());
-
- Tensor *output = helper.getOutputTensor(node);
-
- DataType scratch_data_type =
- helper.getInputTensor(node)->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
-
- auto scratch_tensor =
- std::make_unique<Tensor>(scratch_data_type, Shape({}), AffineQuantization{}, "");
- scratch_tensor->set_observable(false);
- scratch_tensor->set_data_buffer(nullptr);
- Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratch_tensor));
-
- TransposeConvParams params{};
- params.padding = node->padding();
- params.stride_height = node->stride()->h();
- params.stride_width = node->stride()->w();
-
- return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
- tmp, params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/Unpack.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleUnpack(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleUnpack *>(circle_node);
- auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node);
- assert(node->arity() == 1);
- assert(output_nodes.size() == static_cast<size_t>(node->num()));
-
- const Tensor *input = helper.getInputTensor(node->value());
- std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
-
- UnpackParams params{};
- params.axis = node->axis();
-
- // NOTE 'num' attribute is ignored.
- return std::make_unique<kernels::Unpack>(input, std::move(outputs), params);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Builders.h"
-
-#include "kernels/While.h"
-
-namespace luci_interpreter
-{
-
-std::unique_ptr<Kernel> build_kernel_CircleWhile(const luci::CircleNode *circle_node,
- KernelBuilderHelper &helper)
-{
- const auto *node = loco::must_cast<const luci::CircleWhile *>(circle_node);
-
- auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node);
- assert(node->arity() == node->input_count());
- assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
-
- std::vector<const Tensor *> inputs(node->input_count());
- for (uint32_t i = 0; i < node->input_count(); ++i)
- {
- inputs[i] = helper.getInputTensor(node->input(i));
- }
- std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
-
- RuntimeGraph *cond_graph = helper.getRuntimeGraph(node->cond_graph());
- RuntimeGraph *body_graph = helper.getRuntimeGraph(node->body_graph());
-
- return std::make_unique<kernels::While>(std::move(inputs), std::move(outputs), cond_graph,
- body_graph);
-}
-
-} // namespace luci_interpreter
+++ /dev/null
-require(luci-interpreter)
+++ /dev/null
-cmake_minimum_required(VERSION 3.10)
-project(luci_interpreter_micro_standalone)
-
-# Add fake target, so nothing is build
-set(BUILD_WHITELIST "dummy")
-
-add_subdirectory(${NNAS_ROOT}/infra/nncc ${CMAKE_CURRENT_BINARY_DIR}/nncc)
-
-set(ONE_COMPILER_SRC_DIR "${NNAS_PROJECT_SOURCE_DIR}/compiler")
-nnas_find_package(FlatBuffersSource EXACT 2.0 QUIET)
-
-include_directories(${FlatBuffersSource_DIR}/include)
-
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/loco ${CMAKE_CURRENT_BINARY_DIR}/loco)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/angkor ${CMAKE_CURRENT_BINARY_DIR}/angkor)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/oops ${CMAKE_CURRENT_BINARY_DIR}/oops)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/pepper-str ${CMAKE_CURRENT_BINARY_DIR}/pepper-str)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo ${CMAKE_CURRENT_BINARY_DIR}/logo)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo-core ${CMAKE_CURRENT_BINARY_DIR}/logo-core)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/hermes-std ${CMAKE_CURRENT_BINARY_DIR}/hermes-std)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/hermes ${CMAKE_CURRENT_BINARY_DIR}/hermes)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/pepper-strcast ${CMAKE_CURRENT_BINARY_DIR}/pepper-strcast)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/foder ${CMAKE_CURRENT_BINARY_DIR}/foder)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/mio-circle04 ${CMAKE_CURRENT_BINARY_DIR}/mio-circle04)
-
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/locomotiv ${CMAKE_CURRENT_BINARY_DIR}/locomotiv)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/lang ${CMAKE_CURRENT_BINARY_DIR}/luci/lang)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/import ${CMAKE_CURRENT_BINARY_DIR}/luci/import)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/profile ${CMAKE_CURRENT_BINARY_DIR}/luci/profile)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/env ${CMAKE_CURRENT_BINARY_DIR}/luci/env)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/plan ${CMAKE_CURRENT_BINARY_DIR}/luci/plan)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/log ${CMAKE_CURRENT_BINARY_DIR}/luci/log)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/logex ${CMAKE_CURRENT_BINARY_DIR}/luci/logex)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/locop ${CMAKE_CURRENT_BINARY_DIR}/locop)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/pp ${CMAKE_CURRENT_BINARY_DIR}/pp)
-
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci-micro/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter)
"$<TARGET_FILE:luci_eval_driver>"
${LUCI_PASS_VALUE_TESTS}
)
+
+if(ONE_UBUNTU_CODENAME_JAMMY)
+ add_test(NAME luci_pass_value_210_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/eval_driver.sh"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ "${ARTIFACTS_BIN_PATH}"
+ "${NNCC_OVERLAY_DIR}/venv_2_10_1"
+ "$<TARGET_FILE:luci_eval_driver>"
+ ${LUCI_PASS_VALUE_TESTS}
+ )
+endif(ONE_UBUNTU_CODENAME_JAMMY)
addeval(Net_Conv_Add_Mul_002 fuse_batchnorm_with_conv)
addeval(Net_Conv_Min_Max_000 transform_min_max_to_relu6)
addeval(Net_Conv_Min_Relu_000 transform_min_relu_to_relu6)
+addeval(Net_Conv_PReluGraph_000 fuse_prelu)
addeval(Net_Conv_Relu6_000 fuse_activation_function)
addeval(Net_Densify_Add_000 fold_densify)
addeval(Net_Dequantize_Add_000 fold_dequantize)
addeval(Net_DwConv_BN_000 fuse_batchnorm_with_dwconv)
addeval(Net_DwConv_BN_001 fuse_batchnorm_with_dwconv)
+addeval(Net_FullyConnected_Add_000 fold_fully_connected)
addeval(Net_Reshape_Neg_000 forward_reshape_to_unaryop)
addeval(Net_Reshape_Reshape_000 remove_redundant_reshape)
addeval(Net_Squeeze_Squeeze_000 substitute_squeeze_to_reshape)
addeval(Net_TConv_BN_002 fuse_batchnorm_with_tconv)
addeval(Net_TConv_BN_003 fuse_batchnorm_with_tconv)
addeval(Net_TConv_BN_004 fuse_batchnorm_with_tconv)
+addeval(Net_TConv_BN_005 fuse_batchnorm_with_tconv)
addeval(Net_InstanceNorm_001 fuse_instnorm)
addeval(Net_InstanceNorm_002 fuse_instnorm)
addeval(Net_InstanceNorm_003 fuse_instnorm)
addeval(Net_StridedSlice_StridedSlice_000 remove_unnecessary_strided_slice)
addeval(FullyConnected_007 replace_non_const_fc_with_batch_matmul)
+addeval(Net_Transpose_Add_000 forward_transpose_op)
+addeval(Net_Transpose_Abs_000 forward_transpose_op)
+addeval(UnidirectionalSequenceLSTM_003 unroll_unidirseqlstm)
+addeval(UnidirectionalSequenceLSTM_004 unroll_unidirseqlstm)
# test for limited support for FLOAT16
addeval(Net_Dequantize_Add_000 fold_dequantize)
)
endif()
+ if(ONE_UBUNTU_CODENAME_JAMMY)
+ add_test(NAME luci_value_210_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ "${ARTIFACTS_BIN_PATH}"
+ "${NNCC_OVERLAY_DIR}/venv_2_10_1"
+ "$<TARGET_FILE:luci_eval_driver>"
+ ${LUCI_VALUE_TESTS}
+ )
+
+ if(DEFINED LUCI_VALUE_TESTS_TOL)
+ add_test(NAME luci_value_tol_210_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverifytol.sh"
+ "${CMAKE_CURRENT_BINARY_DIR}"
+ "${ARTIFACTS_BIN_PATH}"
+ "${NNCC_OVERLAY_DIR}/venv_2_10_1"
+ "$<TARGET_FILE:luci_eval_driver>"
+ ${LUCI_VALUE_TESTS_TOL}
+ )
+ endif()
+ endif(ONE_UBUNTU_CODENAME_JAMMY)
+
else(NOT CMAKE_CROSSCOMPILING)
# NOTE target test is carried out using reference input/output data from host
# test results. this is because it would be difficult to prepare
rtolf32 = 1e-5
atolf32 = 1e-5
+# NOTE reuse f32 value as int value too
+rtolint = 0
+atolint = 0
try:
if args.rtolf32 != None:
rtolf32 = float(args.rtolf32)
+ rtolint = int(rtolf32)
if args.atolf32 != None:
atolf32 = float(args.atolf32)
+ atolint = int(atolf32)
except ValueError:
print("rtolf32 or atolf32 is not a number")
quit(128)
intp_output_data = interpreter.get_tensor(output_tensor)
try:
if output_details["dtype"] == np.uint8:
- if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+ if np.allclose(
+ luci_output_data, intp_output_data, rtol=rtolint,
+ atol=atolint) == False:
+ print("intp_output_data", intp_output_data)
+ print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
output_dtype = "uint8"
if np.allclose(
luci_output_data, intp_output_data, rtol=rtolf32,
atol=atolf32) == False:
+ print("intp_output_data", intp_output_data)
+ print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
output_dtype = "float32"
elif output_details["dtype"] == np.int64:
- if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+ if np.allclose(
+ luci_output_data, intp_output_data, rtol=rtolint,
+ atol=atolint) == False:
+ print("intp_output_data", intp_output_data)
+ print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
output_dtype = "int64"
elif output_details["dtype"] == np.int32:
- if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+ if np.allclose(
+ luci_output_data, intp_output_data, rtol=rtolint,
+ atol=atolint) == False:
+ print("intp_output_data", intp_output_data)
+ print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
output_dtype = "int32"
elif output_details["dtype"] == np.int16:
- if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+ if np.allclose(
+ luci_output_data, intp_output_data, rtol=rtolint,
+ atol=atolint) == False:
+ print("intp_output_data", intp_output_data)
+ print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
output_dtype = "int16"
elif output_details["dtype"] == np.bool_:
if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+ print("intp_output_data", intp_output_data)
+ print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
output_dtype = "bool"
rtolf32 = 1e-5
atolf32 = 1e-5
+# NOTE reuse f32 value as int value too
+rtolint = 0
+atolint = 0
try:
if args.rtolf32 != None:
rtolf32 = float(args.rtolf32)
+ rtolint = int(rtolf32)
if args.atolf32 != None:
atolf32 = float(args.atolf32)
+ atolint = int(atolf32)
except ValueError:
print("rtolf32 or atolf32 is not a number")
quit(128)
try:
if output_dtype == np.uint8:
if np.allclose(
- luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+ luci_output_data, luci_output_data_ref, rtol=rtolint,
+ atol=atolint) == False:
+ print("luci_output_data_ref", luci_output_data_ref)
+ print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + circle_model_ref +
" does not match with " + circle_model)
elif output_dtype == np.float32:
if np.allclose(
luci_output_data, luci_output_data_ref, rtol=rtolf32,
atol=atolf32) == False:
+ print("luci_output_data_ref", luci_output_data_ref)
+ print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + circle_model_ref +
" does not match with " + circle_model)
elif output_dtype == np.int64:
if np.allclose(
- luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+ luci_output_data, luci_output_data_ref, rtol=rtolint,
+ atol=atolint) == False:
+ print("luci_output_data_ref", luci_output_data_ref)
+ print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + circle_model_ref +
" does not match with " + circle_model)
elif output_dtype == np.int32:
if np.allclose(
- luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+ luci_output_data, luci_output_data_ref, rtol=rtolint,
+ atol=atolint) == False:
+ print("luci_output_data_ref", luci_output_data_ref)
+ print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + circle_model_ref +
" does not match with " + circle_model)
elif output_dtype == np.int16:
if np.allclose(
- luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+ luci_output_data, luci_output_data_ref, rtol=rtolint,
+ atol=atolint) == False:
+ print("luci_output_data_ref", luci_output_data_ref)
+ print("luci_output_data", luci_output_data)
raise SystemExit("Execution result of " + circle_model_ref +
" does not match with " + circle_model)
elif output_dtype == np.bool_:
#addeval(TopKV2_001)
addeval(Transpose_000)
addeval(TransposeConv_000)
+addeval(UnidirectionalSequenceLSTM_002)
+addeval(UnidirectionalSequenceLSTM_003)
+addeval(UnidirectionalSequenceLSTM_004)
addeval(Unpack_000)
addeval(Unpack_001)
addeval(Unpack_002)
addeval(Unpack_003)
+addeval(UnidirectionalSequenceLSTM_002)
#addeval(Where_000)
#addeval(Where_001)
#addeval(While_000)
#addeval(While_001)
#addeval(While_002)
#addeval(While_003)
-addeval(YUV_TO_RGB_U8_000)
#addeval(ZerosLike_000)
# Simple Network test
# Tests with tolerance
addevaltol(SVDF_000 8e-3 8e-3)
addevaltol(SVDF_001 8e-3 8e-3)
+# refer https://github.com/Samsung/ONE/issues/10438
+addevaltol(YUV_TO_RGB_U8_000 1 1)
{
// If there is no tensor, insert CircleOutputExclude.
auto *node = context->graph()->nodes()->create<luci::CircleOutputExclude>();
- // CircleOutputExclude doesn't need a type, but since all nodes must have a type,
- // a dummy type is inserted.
- node->dtype(loco::DataType::FLOAT32);
input_nodes.push_back(node);
}
}
{
// If there is no tensor, insert CircleOutputExclude.
auto *node = context->graph()->nodes()->create<luci::CircleOutputExclude>();
- // CircleOutputExclude doesn't need a type, but since all nodes must have a type,
- // a dummy type is inserted.
- node->dtype(loco::DataType::FLOAT32);
input_nodes.push_back(node);
}
}
auto *node = bna.context->graph()->nodes()->create<CircleBidirectionalSequenceLSTM>();
auto &inputs = bna.input_nodes;
node->input(inputs.at(0));
+
node->fw_input_to_input_weights(inputs.at(1)); // Optional
node->fw_input_to_cell_weights(inputs.at(2));
node->fw_input_to_forget_weights(inputs.at(3));
node->fw_input_to_output_weights(inputs.at(4));
+
node->fw_recurrent_to_input_weights(inputs.at(5)); // Optional
node->fw_recurrent_to_cell_weights(inputs.at(6));
node->fw_recurrent_to_forget_weights(inputs.at(7));
node->fw_recurrent_to_output_weights(inputs.at(8));
+
node->fw_cell_to_input_weights(inputs.at(9)); // Optional
node->fw_cell_to_forget_weights(inputs.at(10)); // Optional
node->fw_cell_to_output_weights(inputs.at(11)); // Optional
- node->fw_input_gate_bias(inputs.at(12)); // Optional
+
+ node->fw_input_gate_bias(inputs.at(12)); // Optional
node->fw_forget_gate_bias(inputs.at(13));
node->fw_cell_gate_bias(inputs.at(14));
node->fw_output_gate_bias(inputs.at(15));
- node->fw_projection_weights(inputs.at(16)); // Optional
- node->fw_projection_bias(inputs.at(17)); // Optional
+
+ node->fw_projection_weights(inputs.at(16)); // Optional
+ node->fw_projection_bias(inputs.at(17)); // Optional
+
node->bw_input_to_input_weights(inputs.at(18)); // Optional
node->bw_input_to_cell_weights(inputs.at(19));
node->bw_input_to_forget_weights(inputs.at(20));
node->bw_input_to_output_weights(inputs.at(21));
+
node->bw_recurrent_to_input_weights(inputs.at(22)); // Optional
node->bw_recurrent_to_cell_weights(inputs.at(23));
node->bw_recurrent_to_forget_weights(inputs.at(24));
node->bw_recurrent_to_output_weights(inputs.at(25));
+
node->bw_cell_to_input_weights(inputs.at(26)); // Optional
node->bw_cell_to_forget_weights(inputs.at(27)); // Optional
node->bw_cell_to_output_weights(inputs.at(28)); // Optional
- node->bw_input_gate_bias(inputs.at(29)); // Optional
+
+ node->bw_input_gate_bias(inputs.at(29)); // Optional
node->bw_forget_gate_bias(inputs.at(30));
node->bw_cell_gate_bias(inputs.at(31));
node->bw_output_gate_bias(inputs.at(32));
+
node->bw_projection_weights(inputs.at(33)); // Optional
node->bw_projection_bias(inputs.at(34)); // Optional
+
node->fw_activation_state(inputs.at(35));
node->fw_cell_state(inputs.at(36));
node->bw_activation_state(inputs.at(37));
node->fw_auxillary_input_to_forget_weights(inputs.at(41)); // Optional
node->fw_auxillary_input_to_cell_weights(inputs.at(42)); // Optional
node->fw_auxillary_input_to_output_weights(inputs.at(43)); // Optional
+
node->bw_auxillary_input_to_input_weights(inputs.at(44)); // Optional
node->bw_auxillary_input_to_forget_weights(inputs.at(45)); // Optional
node->bw_auxillary_input_to_cell_weights(inputs.at(46)); // Optional
if (inputs.size() == 4)
{
auto *bias = graph->nodes()->create<CircleOutputExclude>();
- // CircleOutputExclude doesn't need a type, but since all nodes must have a type,
- // a dummy type is inserted.
- bias->dtype(inputs.at(0)->dtype());
node->bias(bias);
node->input_activation_state(inputs.at(3));
if (inputs.size() == 3)
{
auto *bias = graph->nodes()->create<CircleOutputExclude>();
- // CircleOutputExclude doesn't need a type, but since all nodes must have a type,
- // a dummy type is inserted.
- bias->dtype(loco::DataType::FLOAT32);
node->bias(bias);
}
else
auto *node = graph->nodes()->create<CircleUnidirectionalSequenceLSTM>();
node->input(inputs.at(0));
node->input_to_input_weights(inputs.at(1)); // Optional
- node->input_to_cell_weights(inputs.at(2));
- node->input_to_forget_weights(inputs.at(3));
+ node->input_to_forget_weights(inputs.at(2));
+ node->input_to_cell_weights(inputs.at(3));
node->input_to_output_weights(inputs.at(4));
+
node->recurrent_to_input_weights(inputs.at(5)); // Optional
- node->recurrent_to_cell_weights(inputs.at(6));
- node->recurrent_to_forget_weights(inputs.at(7));
+ node->recurrent_to_forget_weights(inputs.at(6));
+ node->recurrent_to_cell_weights(inputs.at(7));
node->recurrent_to_output_weights(inputs.at(8));
+
node->cell_to_input_weights(inputs.at(9)); // Optional
node->cell_to_forget_weights(inputs.at(10)); // Optional
node->cell_to_output_weights(inputs.at(11)); // Optional
- node->input_gate_bias(inputs.at(12)); // Optional
+
+ node->input_gate_bias(inputs.at(12)); // Optional
node->forget_gate_bias(inputs.at(13));
node->cell_gate_bias(inputs.at(14));
node->output_gate_bias(inputs.at(15));
+
node->projection_weights(inputs.at(16)); // Optional
node->projection_bias(inputs.at(17)); // Optional
- node->activation_state(inputs.at(18));
+
+ node->output_state(inputs.at(18));
node->cell_state(inputs.at(19));
+
node->input_layer_norm_coefficients(inputs.at(20)); // Optional
node->forget_layer_norm_coefficients(inputs.at(21)); // Optional
node->cell_layer_norm_coefficients(inputs.at(22)); // Optional
loco::Node *projection_bias(void) const { return at(17)->node(); }
void projection_bias(loco::Node *node) { at(17)->node(node); }
- loco::Node *activation_state(void) const { return at(18)->node(); }
- void activation_state(loco::Node *node) { at(18)->node(node); }
+ loco::Node *output_state(void) const { return at(18)->node(); }
+ void output_state(loco::Node *node) { at(18)->node(node); }
loco::Node *cell_state(void) const { return at(19)->node(); }
void cell_state(loco::Node *node) { at(19)->node(node); }
ASSERT_EQ(nullptr, trc_node.projection_weights());
ASSERT_EQ(nullptr, trc_node.projection_bias());
- ASSERT_EQ(nullptr, trc_node.activation_state());
+ ASSERT_EQ(nullptr, trc_node.output_state());
ASSERT_EQ(nullptr, trc_node.cell_state());
ASSERT_EQ(nullptr, trc_node.input_layer_norm_coefficients());
void LoggerConfig::configure(const Logger *, hermes::Source::Setting &setting) const
{
- // TODO remove deprecated codes
-#if 0
- setting.filter(hermes::SeverityCategory::FATAL).reject_all();
- setting.filter(hermes::SeverityCategory::ERROR).reject_all();
- setting.filter(hermes::SeverityCategory::WARN).reject_all();
- setting.filter(hermes::SeverityCategory::INFO).reject_all();
- setting.filter(hermes::SeverityCategory::VERBOSE).reject_all();
-
- // TODO enable FATAL and ERROR
- if (_show_warn)
- {
- setting.filter(hermes::SeverityCategory::WARN).accept_all();
- }
- if (_show_info)
- {
- setting.filter(hermes::SeverityCategory::INFO).accept_all();
- }
- if (_show_verbose)
- {
- setting.filter(hermes::SeverityCategory::VERBOSE).accept_upto(_show_verbose);
- }
-#endif
setting.reject_all();
setting.filter(hermes::SeverityCategory::FATAL).accept_upto(_show_verbose);
setting.filter(hermes::SeverityCategory::ERROR).accept_upto(_show_verbose);
}
}
+std::string to_str(float value) { return std::to_string(value); }
+
+std::string to_str(int32_t value) { return std::to_string(value); }
+
std::string to_str(bool value) { return value ? "true" : "false"; }
std::string to_str(luci::FusedActFunc fused)
"output_gate_bias",
"projection_weights",
"projection_bias",
- "activation_state",
+ "output_state",
"cell_state",
"input_layer_norm_coefficients",
"forget_layer_norm_coefficients",
loco::must_cast<luci::CircleNode *>(node->projection_weights());
luci::CircleNode *projection_bias = loco::must_cast<luci::CircleNode *>(node->projection_bias());
- luci::CircleNode *activation_state =
- loco::must_cast<luci::CircleNode *>(node->activation_state());
+ luci::CircleNode *output_state = loco::must_cast<luci::CircleNode *>(node->output_state());
luci::CircleNode *cell_state = loco::must_cast<luci::CircleNode *>(node->cell_state());
luci::CircleNode *input_layer_norm_coefficients =
cloned->projection_weights(cn->find_clone(projection_weights));
cloned->projection_bias(cn->find_clone(projection_bias));
- cloned->activation_state(cn->find_clone(activation_state));
+ cloned->output_state(cn->find_clone(output_state));
cloned->cell_state(cn->find_clone(cell_state));
cloned->input_layer_norm_coefficients(cn->find_clone(input_layer_norm_coefficients));
node()->projection_weights(input(16));
node()->projection_bias(input(17));
- node()->activation_state(input(18));
+ node()->output_state(input(18));
node()->cell_state(input(19));
node()->input_layer_norm_coefficients(input(20));
#include <loco.h>
+#include <cassert>
+
namespace
{
{
auto graph = loco::make_graph();
auto graph_clone = graph.get();
+ auto &graph_name = graph_org->name();
- graph_clone->name(graph_org->name());
+ graph_clone->name(graph_name);
// clone inputs
- for (uint32_t n = 0; n < graph_org->inputs()->size(); ++n)
+ auto inputs = graph_org->inputs();
+ assert(inputs);
+ for (uint32_t n = 0; n < inputs->size(); ++n)
{
auto input_org = luci::input_node(graph_org, n);
assert(input_org != nullptr);
target_link_libraries(luci_pass PRIVATE luci_service)
target_link_libraries(luci_pass PRIVATE luci_logex)
target_link_libraries(luci_pass PRIVATE luci_profile)
-target_link_libraries(luci_pass PRIVATE mio_tflite280_inc)
+target_link_libraries(luci_pass PRIVATE luci_compute)
target_link_libraries(luci_pass PRIVATE nncc_common)
target_link_libraries(luci_pass PRIVATE pepper_csv2vec)
target_link_libraries(luci_pass PRIVATE oops)
FoldCast,
FoldDensify,
FoldDepthwiseConv2D,
+ FoldFullyConnected,
FoldDequantize,
FoldGather,
FoldSparseToDense,
ForwardReshapeToUnaryOp,
+ ForwardTransposeOp,
SparsifyTensorPass,
FusePreActivationBatchNorm,
MakeBatchNormGammaPositive,
FuseActivationFunction,
+ FusePRelu,
ShuffleWeightTo16x1Float32,
RemoveRedundantTranspose,
ReplaceMulAddWithDepthwiseConv,
RemoveRedundantReshape,
RemoveFakeQuant,
RemoveQuantDequantSeq,
+ RemoveDuplicateConst,
+ UnrollUnidirSeqLSTM,
};
enum AlgorithmParameters
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_FULLY_CONNECTED_PASS_H__
+#define __LUCI_FOLD_FULLY_CONNECTED_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to fold FullyConnected with constant input and filter into a
+ * constant tensor
+ */
+struct FoldFullyConnectedPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::FoldFullyConnectedPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_FULLY_CONNECTED_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FORWARD_TRANSPOSE_OP_PASS_H__
+#define __LUCI_FORWARD_TRANSPOSE_OP_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to Forward Transpose Ops for further optimization.
+ */
+struct ForwardTransposeOpPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::ForwardTransposeOpPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FORWARD_TRANSPOSE_OP_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_PRELU_PASS_H__
+#define __LUCI_FUSE_PRELU_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to fuse certain pattern of subgraph into CirclePRelu
+ * with auxiliary nodes
+ *
+ * For detailed subgraph pattern to be fused, please check its implementation.
+ */
+struct FusePReluPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::FusePReluPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_PRELU_PASS_H__
loco::DataType input_model_dtype = loco::DataType::Unknown;
loco::DataType output_model_dtype = loco::DataType::Unknown;
QuantizationGranularity granularity = QuantizationGranularity::ChannelWise;
- loco::DataType input_type = loco::DataType::Unknown;
- loco::DataType output_type = loco::DataType::Unknown;
+ std::vector<loco::DataType> input_types;
+ std::vector<loco::DataType> output_types;
bool TF_style_maxpool = false;
std::vector<LayerInfo> layers_info;
};
- // For backward-compatibility
- // TODO Remove this constructor
-public:
- QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
- QuantizationGranularity granularity)
- {
- _ctx = std::make_unique<Context>();
- {
- _ctx->input_model_dtype = input_model_dtype;
- _ctx->output_model_dtype = output_model_dtype;
- _ctx->granularity = granularity;
- _ctx->input_type = output_model_dtype;
- _ctx->output_type = output_model_dtype;
- _ctx->TF_style_maxpool = false;
- }
- }
-
public:
QuantizeWithMinMaxPass(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)}
{
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_DUPLICATE_CONST_PASS_H__
+#define __LUCI_REMOVE_DUPLICATE_CONST_PASS_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to remove duplicate Const nodes.
+ */
+struct RemoveDuplicateConstPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::RemoveDuplicateConstPass"; }
+
+ bool run(loco::Graph *g) final;
+
+private:
+ bool remove_duplicate_const();
+
+ template <loco::DataType DT> void add_to_map(luci::CircleConst *const_node);
+
+ std::map<float, std::vector<CircleConst *>> _sum_to_const;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_DUPLICATE_CONST_PASS_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_UNROLL_UNIDIRECTIONALSEQUENCELSTM_PASS_H__
+#define __LUCI_UNROLL_UNIDIRECTIONALSEQUENCELSTM_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to Unroll UnidirectionalSequenceLSTM
+ */
+struct UnrollUnidirectionalSequenceLSTMPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::UnrollUnidirectionalSequenceLSTMPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_UNROLL_UNIDIRECTIONALSEQUENCELSTM_PASS_H__
#include "luci/Pass/FoldDensifyPass.h"
#include "luci/Pass/FoldDepthwiseConv2DPass.h"
#include "luci/Pass/FoldDequantizePass.h"
+#include "luci/Pass/FoldFullyConnectedPass.h"
#include "luci/Pass/FoldGatherPass.h"
#include "luci/Pass/FoldSparseToDensePass.h"
#include "luci/Pass/ForwardReshapeToUnaryOpPass.h"
+#include "luci/Pass/ForwardTransposeOpPass.h"
#include "luci/Pass/FuseActivationFunctionPass.h"
#include "luci/Pass/FuseAddWithFullyConnectedPass.h"
#include "luci/Pass/FuseAddWithTConvPass.h"
#include "luci/Pass/FuseInstanceNormPass.h"
#include "luci/Pass/FuseMeanWithMeanPass.h"
#include "luci/Pass/FusePreActivationBatchNormPass.h"
+#include "luci/Pass/FusePReluPass.h"
#include "luci/Pass/FuseTransposeWithMeanPass.h"
#include "luci/Pass/MakeBatchNormGammaPositivePass.h"
+#include "luci/Pass/RemoveDuplicateConstPass.h"
#include "luci/Pass/RemoveFakeQuantPass.h"
#include "luci/Pass/RemoveQuantDequantSeqPass.h"
#include "luci/Pass/RemoveRedundantReshapePass.h"
#include "luci/Pass/SubstituteTransposeToReshapePass.h"
#include "luci/Pass/TransformMinMaxToRelu6Pass.h"
#include "luci/Pass/TransformMinReluToRelu6Pass.h"
+#include "luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h"
// TODO add more passes
#include "luci/Pass/CircleShapeInferencePass.h"
{
phase.emplace_back(std::make_unique<FuseActivationFunctionPass>());
}
+ if (_options->query(Options::Algorithm::FusePRelu))
+ {
+ phase.emplace_back(std::make_unique<FusePReluPass>());
+ }
if (_options->query(Options::Algorithm::FuseTransposeWithMean))
{
phase.emplace_back(std::make_unique<FuseTransposeWithMeanPass>());
{
phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
}
+ if (_options->query(Options::Algorithm::FoldFullyConnected))
+ {
+ phase.emplace_back(std::make_unique<luci::FoldFullyConnectedPass>());
+ }
if (_options->query(Options::Algorithm::FoldGather))
{
phase.emplace_back(std::make_unique<luci::FoldGatherPass>());
{
phase.emplace_back(std::make_unique<luci::ForwardReshapeToUnaryOpPass>());
}
+ if (_options->query(Options::Algorithm::ForwardTransposeOp))
+ {
+ phase.emplace_back(std::make_unique<luci::ForwardTransposeOpPass>());
+ }
if (_options->query(Options::Algorithm::FusePreActivationBatchNorm))
{
phase.emplace_back(std::make_unique<luci::FusePreActivationBatchNormPass>());
{
phase.emplace_back(std::make_unique<luci::ExpandBroadcastConstPass>());
}
+ if (_options->query(Options::Algorithm::RemoveDuplicateConst))
+ {
+ phase.emplace_back(std::make_unique<luci::RemoveDuplicateConstPass>());
+ }
if (_options->query(Options::Algorithm::RemoveFakeQuant))
{
phase.emplace_back(std::make_unique<luci::RemoveFakeQuantPass>());
{
phase.emplace_back(std::make_unique<luci::TransformMinReluToRelu6Pass>());
}
+ if (_options->query(Options::Algorithm::UnrollUnidirSeqLSTM))
+ {
+ phase.emplace_back(std::make_unique<luci::UnrollUnidirectionalSequenceLSTMPass>());
+ }
/* TRANSFORM DECLARATION END */
#include <luci/IR/CircleNode.h>
#include <logo/Phase.h>
+#include <pepper/csv2vec.h>
#include <memory>
using namespace luci;
using LayerParam = luci::CircleQuantizer::Options::LayerParam;
+// This function updates user-given input_type to match with the input signature of graph
+// If user gives only one input_type, it will be expanded to the number of graph inputs
+void canonicalize_input_type(loco::Graph *g, std::vector<loco::DataType> &input_type)
+{
+ if (g == nullptr)
+ return;
+
+ const auto inputs = g->inputs();
+
+ assert(inputs); // FIX_CALLER_UNLESS
+
+ // Check validity of the number of input dtype given by a user
+ if (input_type.size() != 1 and input_type.size() != inputs->size())
+ {
+ throw std::runtime_error(
+ "Invalid number of input dtype. The number of input dtype should be 1 or "
+ "the same as the number of graph inputs.");
+ }
+
+ // Handle the case when a user gives only one input dtype
+ if (input_type.size() == 1)
+ {
+ const auto user_given_dtype = input_type[0];
+ input_type.clear();
+
+ // Expand input dtype to the number of graph inputs
+ // Since quantizer can only quantize float32, user_given_dtype is set only for float32 inputs
+ auto input_nodes = loco::input_nodes(g);
+ for (uint32_t i = 0; i < input_nodes.size(); i++)
+ {
+ auto input = loco::must_cast<luci::CircleInput *>(input_nodes[i]);
+
+ if (input->dtype() == loco::DataType::FLOAT32)
+ input_type.push_back(user_given_dtype);
+ else
+ input_type.push_back(input->dtype());
+ }
+ }
+
+ // Finally, check validity of input_type
+ // input_type is valid if
+ // C1. for non-float32 model input, input_type == model's input dtype
+ // or
+ // C2. for float32 model input, input_type == uint8, int16, or float32
+ auto input_nodes = loco::input_nodes(g);
+ for (uint32_t i = 0; i < input_nodes.size(); i++)
+ {
+ auto input = loco::must_cast<luci::CircleInput *>(input_nodes[i]);
+ assert(i == input->index()); // FIX_ME_UNLESS
+
+ if (input->dtype() != loco::DataType::FLOAT32)
+ {
+ // C1
+ if (input->dtype() != input_type[i])
+ throw std::runtime_error(
+ "Input dtype of " + input->name() +
+ " is invalid. It has to be the same with the model's input dtype.");
+ }
+ else
+ {
+ // C2
+ if (input_type[i] != loco::DataType::FLOAT32 and input_type[i] != loco::DataType::U8 and
+ input_type[i] != loco::DataType::S16)
+ {
+ throw std::runtime_error("Input dtype of " + input->name() +
+ " is invalid. For float32 input, the input dtype after "
+ "quantization must be one of uint8, int16, or float32.");
+ }
+ }
+ }
+}
+
+// This function updates user-given output_type to match with the output signature of graph
+// If user gives only one output_type, it will be expanded to the number of graph outputs
+// NOTE This function is almost same with canonicalize_input_type, but it is written as a
+// separate function for more precise error messaging.
+// TODO Find a way to reduce duplicate codes
+void canonicalize_output_type(loco::Graph *g, std::vector<loco::DataType> &output_type)
+{
+ if (g == nullptr)
+ return;
+
+ const auto outputs = g->outputs();
+
+ assert(outputs); // FIX_CALLER_UNLESS
+
+ // Check validity of the number of output dtype given by a user
+ if (output_type.size() != 1 and output_type.size() != outputs->size())
+ {
+ throw std::runtime_error(
+ "Invalid number of output dtype. The number of output dtype should be 1 or "
+ "the same as the number of graph outputs.");
+ }
+
+ // Handle the case when a user gives only one output dtype
+ if (output_type.size() == 1)
+ {
+ const auto user_given_dtype = output_type[0];
+ output_type.clear();
+
+ // Expand output dtype to the number of graph outputs
+ // If dtype of graph output is float32, it will be replaced with user_given_dtype
+ // Otherwise, it will not change
+ auto output_nodes = loco::output_nodes(g);
+ for (uint32_t i = 0; i < output_nodes.size(); i++)
+ {
+ auto output = loco::must_cast<luci::CircleOutput *>(output_nodes[i]);
+
+ if (output->dtype() == loco::DataType::FLOAT32)
+ output_type.push_back(user_given_dtype);
+ else
+ output_type.push_back(output->dtype());
+ }
+ }
+
+ // Finally, check validity of output_type
+ // output_type is valid if
+ // C1. for non-float32 model output, output_type == model's output dtype
+ // or
+ // C2. for float32 model output, output_type == uint8, int16, or float32
+ auto output_nodes = loco::output_nodes(g);
+ for (uint32_t i = 0; i < output_nodes.size(); i++)
+ {
+ auto output = loco::must_cast<luci::CircleOutput *>(output_nodes[i]);
+ assert(i == output->index()); // FIX_ME_UNLESS
+
+ if (output->dtype() != loco::DataType::FLOAT32)
+ {
+ // C1
+ if (output->dtype() != output_type[i])
+ throw std::runtime_error(
+ "Output dtype of " + output->name() +
+ " is invalid. It has to be the same with the model's output dtype.");
+ }
+ else
+ {
+ // C2
+ if (output_type[i] != loco::DataType::FLOAT32 and output_type[i] != loco::DataType::U8 and
+ output_type[i] != loco::DataType::S16)
+ {
+ throw std::runtime_error("Output dtype of " + output->name() +
+ " is invalid. For float32 output, the output dtype after "
+ "quantization must be one of uint8, int16, or float32.");
+ }
+ }
+ }
+}
+
template <typename T> T lexical_cast(const std::string &str)
{
std::istringstream ss;
static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"};
static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"};
static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
- static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16", "float32"};
- static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16", "float32"};
+ static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16", "int32",
+ "int64", "float32", "bool"};
+ static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16", "int32",
+ "int64", "float32", "bool"};
auto input_model_dtype =
_options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
if (output_type.empty())
output_type = output_model_dtype;
+ auto input_type_vec = pepper::csv_to_vector<std::string>(input_type);
+ auto output_type_vec = pepper::csv_to_vector<std::string>(output_type);
+
bool TF_style_maxpool =
_options->param(Options::AlgorithmParameters::Quantize_TF_style_maxpool) == "True";
throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
to_string(qwmm_supported_granularity));
- if (!in_array(to_lower_case(input_type), qwmm_supported_input_type))
- throw std::runtime_error("Unsupported input type. List of supported input types: " +
- to_string(qwmm_supported_input_type));
+ for (auto dtype : input_type_vec)
+ {
+ if (!in_array(to_lower_case(dtype), qwmm_supported_input_type))
+ throw std::runtime_error("Unsupported input type. List of supported input types: " +
+ to_string(qwmm_supported_input_type));
+ }
- if (!in_array(to_lower_case(output_type), qwmm_supported_output_type))
- throw std::runtime_error("Unsupported output type. List of supported output types: " +
- to_string(qwmm_supported_output_type));
+ for (auto dtype : output_type_vec)
+ {
+ if (!in_array(to_lower_case(dtype), qwmm_supported_output_type))
+ throw std::runtime_error("Unsupported output type. List of supported output types: " +
+ to_string(qwmm_supported_output_type));
+ }
if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
str_to_dtype(output_model_dtype) != loco::DataType::U8)
}
}
+ auto input_types = str_vec_to_dtype_vec(input_type_vec);
+ auto output_types = str_vec_to_dtype_vec(output_type_vec);
+
+ // Canonicalize user-given input/output_type (match with # of inputs/outputs)
+ canonicalize_input_type(g, input_types);
+ canonicalize_output_type(g, output_types);
+
// Input model checker for quantization
luci::QuantizePreCheckerPass input_model_checker{};
input_model_checker.run(g);
ctx->input_model_dtype = str_to_dtype(input_model_dtype);
ctx->output_model_dtype = str_to_dtype(output_model_dtype);
ctx->granularity = str_to_granularity(granularity);
- ctx->input_type = str_to_dtype(input_type);
- ctx->output_type = str_to_dtype(output_type);
+ ctx->input_types = input_types;
+ ctx->output_types = output_types;
ctx->TF_style_maxpool = TF_style_maxpool;
for (auto layer_param : layer_params)
{
verify_ctx->output_model_dtype = str_to_dtype(output_model_dtype);
verify_ctx->granularity = str_to_granularity(granularity);
- verify_ctx->input_type = str_to_dtype(input_type);
- verify_ctx->output_type = str_to_dtype(output_type);
+ verify_ctx->input_types = input_types;
+ verify_ctx->output_types = output_types;
verify_ctx->TF_style_maxpool = TF_style_maxpool;
for (auto layer_param : layer_params)
return true;
}
-// NOTE Following conditions can be extended later
-// NOTE Used for Maximum, Miminum as ReLU/ReLU6
-//
-// Find T with an NCHW pattern described below
-// - Input (non-constant) shape : [N, C, H, W]
-// - Input (constant) shape : [1] or []
-// - Output shape : [N, C, H, W]
-template <class T>
-bool is_NCHW_with_s_const(const T *node, luci::CircleNode *&pred_node,
- luci::CircleConst *&comp_const)
+bool is_const(const loco::Node *node)
{
- auto x = dynamic_cast<luci::CircleConst *>(node->x());
- auto y = dynamic_cast<luci::CircleConst *>(node->y());
-
- if (x != nullptr && y == nullptr)
- {
- pred_node = loco::must_cast<luci::CircleNode *>(node->y());
- comp_const = x;
- }
- else if (x == nullptr && y != nullptr)
- {
- pred_node = loco::must_cast<luci::CircleNode *>(node->x());
- comp_const = y;
- }
- else
- {
- // Ignore if T does not have a comp_const input.
+ if (not dynamic_cast<const luci::CircleConst *>(node))
return false;
- }
- if (pred_node->rank() != 4)
+ return true;
+}
+
+bool is_scalar_const(const loco::Node *node)
+{
+ auto const_node = dynamic_cast<const luci::CircleConst *>(node);
+ if (not const_node)
return false;
- // Check if scalar
- const auto const_rank = comp_const->rank();
- if (const_rank == 0 || (const_rank == 1 && comp_const->dim(0).value() == 1))
+ const auto const_rank = const_node->rank();
+ // shape of scalar
+ // 1. rank = 0
+ // 2. rank = 1, dimension = 1
+ if (const_rank == 0)
+ return true;
+
+ if (const_rank == 1 && const_node->dim(0).value() == 1)
return true;
+
return false;
}
bool visit(luci::CircleLogistic *node) { return convert_unary_x<luci::CircleLogistic>(node); }
- bool visit(luci::CircleLogSoftmax *node)
- {
- return convert_unary_logits<luci::CircleLogSoftmax>(node);
- }
-
bool visit(luci::CircleMaximum *node)
{
- luci::CircleNode *pred_node = nullptr;
- luci::CircleConst *comp_constant = nullptr;
-
- if (is_NCHW_with_s_const<luci::CircleMaximum>(node, pred_node, comp_constant))
+ if ((not is_const(node->x())) and is_scalar_const(node->y()))
{
auto pre_trans = create_pre_transpose(node);
- pre_trans->a(pred_node);
+ pre_trans->a(node->x());
node->x(pre_trans);
}
+ else if (is_scalar_const(node->x()) and (not is_const(node->y())))
+ {
+ auto pre_trans = create_pre_transpose(node);
+ pre_trans->a(node->y());
+ node->y(pre_trans);
+ }
+ else if ((not is_const(node->x())) and (not is_const(node->y())))
+ {
+ auto pre_trans_x = create_pre_transpose(node);
+ pre_trans_x->a(node->x());
+ node->x(pre_trans_x);
+
+ auto pre_trans_y = create_pre_transpose(node);
+ pre_trans_y->a(node->y());
+ node->y(pre_trans_y);
+ }
else
{
// TODO support other cases
bool visit(luci::CircleMinimum *node)
{
- luci::CircleNode *pred_node = nullptr;
- luci::CircleConst *comp_constant = nullptr;
-
- if (is_NCHW_with_s_const<luci::CircleMinimum>(node, pred_node, comp_constant))
+ if ((not is_const(node->x())) and is_scalar_const(node->y()))
{
auto pre_trans = create_pre_transpose(node);
- pre_trans->a(pred_node);
+ pre_trans->a(node->x());
node->x(pre_trans);
}
+ else if (is_scalar_const(node->x()) and (not is_const(node->y())))
+ {
+ auto pre_trans = create_pre_transpose(node);
+ pre_trans->a(node->y());
+ node->y(pre_trans);
+ }
else
{
// TODO support other cases
return true;
}
+ // TODO Reduce duplicate codes with CircleReduceMax
+ bool visit(luci::CircleReduceMin *node)
+ {
+ auto input = loco::must_cast<luci::CircleNode *>(node->input());
+ if (input->rank() != 4)
+ return false;
+
+ auto rindices = dynamic_cast<luci::CircleConst *>(node->reduction_indices());
+ if (not rindices)
+ return false;
+
+ auto nhwc_rindices = create_NHWC_rindices(rindices);
+ if (not nhwc_rindices)
+ return false;
+
+ auto pre_trans = create_pre_transpose(node);
+ pre_trans->a(input);
+ node->input(pre_trans);
+
+ // Do shape inference for this node again.
+ node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ node->reduction_indices(nhwc_rindices);
+
+ if (node->keep_dims())
+ {
+ auto post_trans = create_post_transpose(node);
+ loco::replace(node).with(post_trans);
+
+ post_trans->a(node);
+
+ return true;
+ }
+
+ // The below codes handle the cases where node->keep_dims() == false
+ // 1D output never needs a transpose
+ if (node->rank() <= 1)
+ return true;
+
+ std::vector<bool> reduced_dims_nhwc(4, false);
+ uint32_t num_reduced_indices = nhwc_rindices->size<loco::DataType::S32>();
+
+ for (uint32_t ri = 0; ri < num_reduced_indices; ++ri)
+ {
+ reduced_dims_nhwc[nhwc_rindices->at<loco::DataType::S32>(ri)] = true;
+ }
+
+ // if channel dimension has been reduced, we don't need a transpose
+ if (reduced_dims_nhwc[3])
+ return true;
+
+ // likewise, if both space dimensions are reduced, no transpose is needed
+ if (reduced_dims_nhwc[1] && reduced_dims_nhwc[2])
+ return true;
+
+ std::vector<int32_t> post_trans_ind;
+ // case 1: only N is reduced
+ if (num_reduced_indices == 1 && reduced_dims_nhwc[0])
+ post_trans_ind = {2, 0, 1};
+
+ // case 2: only H or W is reduced
+ if (num_reduced_indices == 1 && (reduced_dims_nhwc[1] || reduced_dims_nhwc[2]))
+ post_trans_ind = {0, 2, 1};
+
+ // case 3: N and either H or W are reduced
+ if (num_reduced_indices == 2)
+ post_trans_ind = {1, 0};
+
+ auto post_trans = create_Nd_transpose(node, post_trans_ind);
+ loco::replace(node).with(post_trans);
+
+ post_trans->a(node);
+
+ return true;
+ }
+
bool visit(luci::CircleRelu *node) { return convert_unary_features<luci::CircleRelu>(node); }
bool visit(luci::CircleRelu6 *node) { return convert_unary_features<luci::CircleRelu6>(node); }
bool visit(luci::CircleRsqrt *node) { return convert_unary_x<luci::CircleRsqrt>(node); }
- bool visit(luci::CircleSoftmax *node) { return convert_unary_logits<luci::CircleSoftmax>(node); }
-
bool visit(luci::CircleSplitV *node)
{
// Change split dimension
collect_intermediate = [&](loco::Node *n) {
for (auto succ : loco::succs(n))
{
+ // Skip unnecessary traversal
+ if (intermediate.find(succ) != intermediate.end())
+ continue;
+
// Exit condition
if (is_post_transpose(succ) || is_post_reshape(succ))
continue;
set_data_format(node, DataFormat::NCHW);
}
break;
+ // SOFTMAX, LOG_SOFTMAX are not converted, because
+ // tflite/circle assumes the last channel is always axis
case luci::CircleOpcode::ADD:
case luci::CircleOpcode::CONCATENATION:
case luci::CircleOpcode::ELU:
case luci::CircleOpcode::LEAKY_RELU:
case luci::CircleOpcode::LOGISTIC:
- case luci::CircleOpcode::LOG_SOFTMAX:
case luci::CircleOpcode::MAXIMUM:
case luci::CircleOpcode::MEAN:
case luci::CircleOpcode::MINIMUM:
case luci::CircleOpcode::PAD:
case luci::CircleOpcode::PADV2:
case luci::CircleOpcode::REDUCE_MAX:
+ case luci::CircleOpcode::REDUCE_MIN:
case luci::CircleOpcode::RELU:
case luci::CircleOpcode::RELU6:
case luci::CircleOpcode::RSQRT:
- case luci::CircleOpcode::SOFTMAX:
case luci::CircleOpcode::SPLIT_V:
case luci::CircleOpcode::SQUARED_DIFFERENCE:
case luci::CircleOpcode::SUB:
{
// TODO replace the check above with the input rank check, and remove the condition below
if (not dynamic_cast<luci::CircleMean *>(node) and
- not dynamic_cast<luci::CircleReduceMax *>(node))
+ not dynamic_cast<luci::CircleReduceMax *>(node) and
+ not dynamic_cast<luci::CircleReduceMin *>(node))
continue;
}
luci::CircleLogistic *logistic = nullptr;
};
-class LogSoftmaxGraph final : public SimpleGraph
-{
-protected:
- loco::Node *insertGraphBody(loco::Node *input) override
- {
- log_softmax = g.nodes()->create<luci::CircleLogSoftmax>();
- log_softmax->logits(input);
- log_softmax->name("log_softmax");
-
- return log_softmax;
- }
-
-public:
- luci::CircleLogSoftmax *log_softmax = nullptr;
-};
-
class MaximumGraph final : public SimpleGraph
{
protected:
luci::CircleConst *limit = nullptr;
};
+class MaximumNonConstGraph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ max = g.nodes()->create<luci::CircleMaximum>();
+ max->dtype(loco::DataType::FLOAT32);
+ max->shape({1, 16, 4, 4});
+
+ max->x(input);
+ max->y(input);
+
+ max->name("max");
+
+ return max;
+ }
+
+public:
+ luci::CircleMaximum *max = nullptr;
+};
+
class MeanGraph final : public SimpleGraph
{
protected:
std::initializer_list<uint32_t> _shape = {1, 16, 1, 1};
};
+class ReduceMinGraph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ rm = g.nodes()->create<luci::CircleReduceMin>();
+ rindices = g.nodes()->create<luci::CircleConst>();
+
+ rm->dtype(loco::DataType::FLOAT32);
+ rindices->dtype(loco::DataType::S32);
+
+ rm->shape(_shape);
+ rindices->shape({static_cast<uint32_t>(_axes.size())});
+
+ rindices->size<loco::DataType::S32>(_axes.size());
+ for (uint32_t i = 0; i < _axes.size(); ++i)
+ {
+ rindices->at<loco::DataType::S32>(i) = _axes[i];
+ }
+
+ rm->input(input);
+ rm->reduction_indices(rindices);
+ rm->keep_dims(_keep_dims);
+
+ rm->name("reduce_max");
+ rindices->name("rindices");
+
+ return rm;
+ }
+
+public:
+ void keep_dims(bool val) { _keep_dims = val; }
+ void axes(std::vector<int32_t> val) { _axes = val; }
+ void shape(std::initializer_list<uint32_t> val) { _shape = val; }
+
+public:
+ luci::CircleReduceMin *rm = nullptr;
+ luci::CircleConst *rindices = nullptr;
+
+private:
+ bool _keep_dims = true;
+ std::vector<int32_t> _axes = {2, 3};
+ std::initializer_list<uint32_t> _shape = {1, 16, 1, 1};
+};
+
class ReluGraph final : public SimpleGraph
{
protected:
luci::CircleRsqrt *rsqrt = nullptr;
};
-class SoftmaxGraph final : public SimpleGraph
-{
-protected:
- loco::Node *insertGraphBody(loco::Node *input) override
- {
- softmax = g.nodes()->create<luci::CircleSoftmax>();
- softmax->logits(input);
- softmax->name("softmax");
-
- return softmax;
- }
-
-public:
- luci::CircleSoftmax *softmax = nullptr;
-};
-
class SplitVGraphlet
{
public:
EXPECT_EQ(16, g.logistic->dim(3).value());
}
-TEST(ConvertNCHWToNHWC, LogSoftmax)
+TEST(ConvertNCHWToNHWC, Maximum)
{
- LogSoftmaxGraph g;
+ MaximumGraph g;
g.init();
- run_phase(&g.g, true, true);
+ run_phase(&g.g, false, false);
+
+ auto input_succs = loco::succs(g.input);
+ EXPECT_EQ(1, input_succs.size());
+ check_post_trans(*input_succs.begin());
- check_pre_trans(g.log_softmax->logits());
+ check_pre_trans(g.max->x());
- auto log_softmax_succs = loco::succs(g.log_softmax);
- EXPECT_EQ(1, log_softmax_succs.size());
- check_post_trans(*log_softmax_succs.begin());
+ auto max_succs = loco::succs(g.max);
+ EXPECT_EQ(1, max_succs.size());
+ check_post_trans(*max_succs.begin());
- // Check log_softmax shape
- EXPECT_EQ(1, g.log_softmax->dim(0).value());
- EXPECT_EQ(4, g.log_softmax->dim(1).value());
- EXPECT_EQ(4, g.log_softmax->dim(2).value());
- EXPECT_EQ(16, g.log_softmax->dim(3).value());
+ check_pre_trans(g.output->from());
}
-TEST(ConvertNCHWToNHWC, Maximum)
+TEST(ConvertNCHWToNHWC, Maximum_non_scalar_NEG)
{
MaximumGraph g;
g.init();
- run_phase(&g.g, false, false);
+ g.limit->shape({3});
- auto input_succs = loco::succs(g.input);
- EXPECT_EQ(1, input_succs.size());
- check_post_trans(*input_succs.begin());
+ luci::ConvertNCHWToNHWCPass pass(true, true);
+ EXPECT_FALSE(pass.run(&g.g));
+}
+
+TEST(ConvertNCHWToNHWC, MaximumNonConst)
+{
+ MaximumNonConstGraph g;
+ g.init();
+
+ run_phase(&g.g, true, true);
check_pre_trans(g.max->x());
+ check_pre_trans(g.max->y());
auto max_succs = loco::succs(g.max);
EXPECT_EQ(1, max_succs.size());
check_post_trans(*max_succs.begin());
-
- check_pre_trans(g.output->from());
}
TEST(ConvertNCHWToNHWC, Mean)
check_pre_trans(g.output->from());
}
+TEST(ConvertNCHWToNHWC, Minimum_non_scalar_NEG)
+{
+ MinimumGraph g;
+ g.init();
+
+ g.limit->shape({3});
+
+ luci::ConvertNCHWToNHWCPass pass(true, true);
+ EXPECT_FALSE(pass.run(&g.g));
+}
+
TEST(ConvertNCHWToNHWC, Mul)
{
MulGraph g;
}
}
+TEST(ConvertNCHWToNHWC, ReduceMin)
+{
+ ReduceMinGraph g;
+ g.init();
+
+ run_phase(&g.g, true, true);
+
+ check_pre_trans(g.rm->input());
+
+ auto rm_succs = loco::succs(g.rm);
+ EXPECT_EQ(1, rm_succs.size());
+ check_post_trans(*rm_succs.begin());
+
+ auto new_rindices = dynamic_cast<luci::CircleConst *>(g.rm->reduction_indices());
+ EXPECT_NE(nullptr, new_rindices);
+ EXPECT_EQ(1, new_rindices->rank());
+ EXPECT_EQ(2, new_rindices->dim(0).value());
+ EXPECT_EQ(2, new_rindices->size<loco::DataType::S32>());
+ EXPECT_EQ(1, new_rindices->at<loco::DataType::S32>(0));
+ EXPECT_EQ(2, new_rindices->at<loco::DataType::S32>(1));
+}
+
+TEST(ConvertNCHWToNHWC, ReduceMin_keep_dims_false)
+{
+ struct TC
+ {
+ std::vector<int32_t> nchw_ind;
+ std::vector<int32_t> nhwc_ind;
+ std::initializer_list<uint32_t> shape;
+ bool needs_transpose = false;
+ };
+
+ uint32_t n = 1;
+ uint32_t c = 16;
+ uint32_t h = 4;
+ uint32_t w = 4;
+
+ std::vector<TC> test_cases{{{0}, {0}, {c, h, w}, true}, {{1}, {3}, {n, h, w}, false},
+ {{2}, {1}, {n, c, w}, true}, {{3}, {2}, {n, c, h}, true},
+ {{0, 1}, {0, 3}, {h, w}, false}, {{0, 2}, {0, 1}, {c, w}, true},
+ {{0, 3}, {0, 2}, {c, h}, true}, {{1, 2}, {3, 1}, {n, w}, false},
+ {{1, 3}, {3, 2}, {n, h}, false}, {{2, 3}, {1, 2}, {n, c}, false},
+ {{0, 1, 2}, {0, 3, 1}, {w}, false}};
+
+ for (auto &tc : test_cases)
+ {
+ ReduceMinGraph g;
+ g.keep_dims(false);
+ g.axes(tc.nchw_ind);
+ g.shape(tc.shape);
+ g.init();
+
+ run_phase(&g.g, true, true);
+
+ check_pre_trans(g.rm->input());
+
+ auto rm_succs = loco::succs(g.rm);
+ EXPECT_EQ(1, rm_succs.size());
+ if (tc.needs_transpose)
+ {
+ EXPECT_NE(nullptr, dynamic_cast<luci::CircleTranspose *>(*rm_succs.begin()));
+ }
+ else
+ {
+ EXPECT_NE(nullptr, dynamic_cast<luci::CircleOutput *>(*rm_succs.begin()));
+ }
+
+ auto new_rindices = dynamic_cast<luci::CircleConst *>(g.rm->reduction_indices());
+ EXPECT_NE(nullptr, new_rindices);
+ EXPECT_EQ(1, new_rindices->rank());
+ EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->dim(0).value());
+ EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->size<loco::DataType::S32>());
+ for (uint32_t i = 0; i < tc.nhwc_ind.size(); ++i)
+ {
+ EXPECT_EQ(tc.nhwc_ind[i], new_rindices->at<loco::DataType::S32>(i));
+ }
+ }
+}
+
TEST(ConvertNCHWToNHWC, Relu)
{
ReluGraph g;
EXPECT_EQ(16, g.rsqrt->dim(3).value());
}
-TEST(ConvertNCHWToNHWC, Softmax)
-{
- SoftmaxGraph g;
- g.init();
-
- run_phase(&g.g, true, true);
-
- check_pre_trans(g.softmax->logits());
-
- auto softmax_succs = loco::succs(g.softmax);
- EXPECT_EQ(1, softmax_succs.size());
- check_post_trans(*softmax_succs.begin());
-
- // Check softmax shape
- EXPECT_EQ(1, g.softmax->dim(0).value());
- EXPECT_EQ(4, g.softmax->dim(1).value());
- EXPECT_EQ(4, g.softmax->dim(2).value());
- EXPECT_EQ(16, g.softmax->dim(3).value());
-}
-
TEST(ConvertNCHWToNHWC, SplitV)
{
SplitVGraph g;
luci::CircleQuantize *create_quantize(luci::CircleNode *node)
{
auto quantize = node->graph()->nodes()->create<luci::CircleQuantize>();
- quantize->name(node->name() + "_Quantize");
+ // DESIGN NOTE: Why use '_FQ_Quantize' instead of '_Quantize'?
+ // '_Quantize' is used in mixed-precision quantization
+ // We add '_FQ' to distinguish Op from mixed-precision quantization
+ quantize->name(node->name() + "_FQ_Quantize");
quantize->dtype(node->dtype());
quantize->rank(node->rank());
for (uint32_t i = 0; i < node->rank(); i++)
luci::CircleDequantize *create_dequantize(luci::CircleNode *node)
{
auto dequantize = node->graph()->nodes()->create<luci::CircleDequantize>();
- dequantize->name(node->name() + "_Dequantize");
+ // DESIGN NOTE: Why use '_FQ_Dequantize' instead of '_Dequantize'?
+ // '_Dequantize' is used in mixed-precision quantization
+ // We add '_FQ' to distinguish Op from mixed-precision quantization
+ dequantize->name(node->name() + "_FQ_Dequantize");
dequantize->dtype(loco::DataType::FLOAT32);
dequantize->rank(node->rank());
for (uint32_t i = 0; i < node->rank(); i++)
// For non-const activation, insert Quantize-Dequantize Ops
// and dequantize the node
+ void visit(luci::CircleAbs *node) { fq_activation(node); }
void visit(luci::CircleAdd *node) { fq_activation(node); }
void visit(luci::CircleAveragePool2D *node) { fq_activation(node); }
void visit(luci::CircleBatchMatMul *node) { fq_activation(node); }
void visit(luci::CirclePad *node) { fq_activation(node); }
void visit(luci::CirclePRelu *node) { fq_activation(node); }
void visit(luci::CircleMean *node) { fq_activation(node); }
+ void visit(luci::CircleReduceProd *node) { fq_activation(node); }
void visit(luci::CircleReduceMax *node) { fq_activation(node); }
void visit(luci::CircleRelu *node) { fq_activation(node); }
void visit(luci::CircleRelu6 *node) { fq_activation(node); }
// (dtype will be automatically updated by type inference)
void visit(luci::CircleCast *) {}
void visit(luci::CircleConcatenation *) {}
+ void visit(luci::CircleDepthToSpace *) {}
void visit(luci::CircleGather *) {}
void visit(luci::CircleSlice *) {}
void visit(luci::CircleStridedSlice *) {}
void visit(luci::CircleReshape *) {}
+ void visit(luci::CircleSpaceToDepth *) {}
void visit(luci::CircleSplit *) {}
void visit(luci::CircleSplitOut *) {}
void visit(luci::CircleSplitV *) {}
void visit(luci::CircleSplitVOut *) {}
void visit(luci::CircleTranspose *) {}
+ void visit(luci::CirclePack *) {}
+ void visit(luci::CircleUnpack *) {}
+ void visit(luci::CircleUnpackOut *) {}
// For Ops that return index, fake quantization is unnecessary
void visit(luci::CircleArgMax *) {}
#include <luci/IR/CircleNodes.h>
+#include <limits> // std::numeric_limits
+
#include <gtest/gtest.h>
namespace
#include <luci/Log.h>
+#include <limits> // std::numeric_limits
+
namespace
{
#include <luci/IR/CircleNodes.h>
+#include <limits> // std::numeric_limits
+
#include <gtest/gtest.h>
namespace
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldFullyConnectedPass.h"
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/AttrFusedActFunc.h>
+
+#include <luci/Log.h>
+
+#include <limits> // std::numeric_limits
+
+namespace
+{
+
+bool set_kernel_parameters(tflite::FullyConnectedParams *params, luci::CircleFullyConnected *node)
+{
+ switch (node->fusedActivationFunction())
+ {
+ case luci::FusedActFunc::NONE:
+ case luci::FusedActFunc::TANH:
+ params->float_activation_min = std::numeric_limits<float>::lowest();
+ params->float_activation_max = std::numeric_limits<float>::max();
+ break;
+ case luci::FusedActFunc::RELU:
+ params->float_activation_min = 0;
+ params->float_activation_max = std::numeric_limits<float>::max();
+ break;
+ case luci::FusedActFunc::RELU_N1_TO_1:
+ params->float_activation_min = -1;
+ params->float_activation_max = 1;
+ break;
+ case luci::FusedActFunc::RELU6:
+ params->float_activation_min = 0;
+ params->float_activation_max = 6;
+ break;
+ default:
+ {
+ LOGGER(l);
+ WARN(l) << "Unsupported activation: " << uint32_t(node->fusedActivationFunction());
+ return false;
+ }
+ }
+
+ assert(node->weights_format() ==
+ luci::CircleFullyConnected::WeightsFormat::DEFAULT); // FIX_CALLER_UNLESS
+ params->weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
+
+ return true;
+}
+
+#define RETURN_FALSE_UNLESS(cond) \
+ if (not(cond)) \
+ return false;
+
+/**
+ * Fold FullyConnected with constant input and filter into a constant tensor
+ *
+ * BEFORE
+ *
+ * [CircleConst] [CircleConst]
+ * | |
+ * [CircleFullyConnected]
+ *
+ * AFTER
+ *
+ * [CircleConst]
+ */
+bool fold_fully_connected(luci::CircleFullyConnected *node)
+{
+ RETURN_FALSE_UNLESS(node != nullptr);
+
+ LOGGER(l);
+
+ auto const input = dynamic_cast<luci::CircleConst *>(node->input());
+ auto const weights = dynamic_cast<luci::CircleConst *>(node->weights());
+ auto const bias = dynamic_cast<luci::CircleConst *>(node->bias());
+ auto const no_bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias());
+
+ RETURN_FALSE_UNLESS(input != nullptr);
+ RETURN_FALSE_UNLESS(weights != nullptr);
+ RETURN_FALSE_UNLESS(node->weights_format() == luci::CircleFullyConnected::WeightsFormat::DEFAULT);
+ RETURN_FALSE_UNLESS(bias != nullptr or no_bias != nullptr);
+
+ RETURN_FALSE_UNLESS(input->dtype() == loco::DataType::FLOAT32);
+ RETURN_FALSE_UNLESS(weights->dtype() == loco::DataType::FLOAT32);
+ if (bias)
+ RETURN_FALSE_UNLESS(bias->dtype() == loco::DataType::FLOAT32);
+
+ auto const input_elems = input->size<loco::DataType::FLOAT32>();
+
+ RETURN_FALSE_UNLESS(weights->rank() == 2);
+ RETURN_FALSE_UNLESS(input_elems % weights->dim(1).value() == 0);
+ auto const batch_size = input_elems / weights->dim(1).value();
+ auto const num_units = weights->dim(0).value();
+
+ if (bias)
+ RETURN_FALSE_UNLESS(bias->size<loco::DataType::FLOAT32>() == num_units);
+
+ tflite::FullyConnectedParams params{};
+ if (!set_kernel_parameters(¶ms, node))
+ return false; // Unsupported kernel parameter values
+
+ std::vector<uint32_t> output_shape;
+ if (node->keep_num_dims() == false)
+ {
+ output_shape.push_back(batch_size);
+ output_shape.push_back(num_units);
+ }
+ else
+ {
+ output_shape.resize(input->rank());
+ for (uint32_t i = 0; i < input->rank(); i++)
+ output_shape[i] = input->dim(i).value();
+ output_shape[input->rank() - 1] = num_units;
+ }
+
+ auto constant = node->graph()->nodes()->create<luci::CircleConst>();
+ {
+ constant->name(node->name());
+ constant->dtype(node->dtype());
+ constant->rank(node->rank());
+ constant->shape_status(luci::ShapeStatus::VALID);
+ uint32_t num_elem = 1;
+ for (uint32_t i = 0; i < node->rank(); ++i)
+ {
+ constant->dim(i).set(node->dim(i).value());
+ num_elem *= node->dim(i).value();
+ }
+ constant->size<loco::DataType::FLOAT32>(num_elem);
+ }
+
+ auto tensor_shape = [](luci::CircleNode *node) {
+ if (node == nullptr)
+ return tflite::RuntimeShape();
+
+ tflite::RuntimeShape runtime_shape(node->rank());
+ for (uint32_t i = 0; i < node->rank(); ++i)
+ runtime_shape.SetDim(i, node->dim(i).value());
+ return runtime_shape;
+ };
+
+ auto tensor_data = [](luci::CircleConst *node) -> float * {
+ if (node == nullptr)
+ return nullptr;
+
+ return &node->at<loco::DataType::FLOAT32>(0);
+ };
+
+ tflite::reference_ops::FullyConnected(
+ params, tensor_shape(input), tensor_data(input), tensor_shape(weights), tensor_data(weights),
+ tensor_shape(bias), tensor_data(bias), tensor_shape(constant), tensor_data(constant));
+
+ loco::replace(node).with(constant);
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for FullyConnected Op
+ **/
+bool FoldFullyConnectedPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto fc = dynamic_cast<CircleFullyConnected *>(node);
+
+ if (fold_fully_connected(fc))
+ changed = true;
+ }
+
+ return changed;
+}
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldFullyConnectedPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <limits> // std::numeric_limits
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ * Graph has an FullyConnected Op with constant inputs
+ *
+ * BEFORE
+ *
+ * [CircleConst] [CircleConst]
+ * | |
+ * [CircleFullyConnected]
+ *
+ * AFTER
+ *
+ * [CircleConst]
+ */
+class FoldFullyConnectedTest : public luci::ConstantFoldingTestGraph, public ::testing::Test
+{
+#define INPUT_DIM 80
+#define NUM_UNITS 32
+
+public:
+ FoldFullyConnectedTest() : luci::ConstantFoldingTestGraph({INPUT_DIM}, loco::DataType::FLOAT32)
+ {
+ _fc = _g.nodes()->create<luci::CircleFullyConnected>();
+ _fc_input = _g.nodes()->create<luci::CircleConst>();
+ _fc_weights = _g.nodes()->create<luci::CircleConst>();
+ _fc_bias = _g.nodes()->create<luci::CircleConst>();
+
+ _fc->dtype(loco::DataType::FLOAT32);
+ _fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _fc->input(_fc_input);
+ _fc->weights(_fc_weights);
+ _fc->bias(_fc_bias);
+ _fc->shape({NUM_UNITS});
+ _fc->weights_format(luci::CircleFullyConnected::WeightsFormat::DEFAULT);
+ _fc->keep_num_dims(true);
+
+ _fc_input->dtype(loco::DataType::FLOAT32);
+ _fc_input->shape({INPUT_DIM});
+ _fc_input->size<loco::DataType::FLOAT32>(INPUT_DIM);
+
+ _fc_weights->dtype(loco::DataType::FLOAT32);
+ _fc_weights->shape({NUM_UNITS, INPUT_DIM});
+ _fc_weights->size<loco::DataType::FLOAT32>(NUM_UNITS * INPUT_DIM);
+
+ _fc_bias->dtype(loco::DataType::FLOAT32);
+ _fc_bias->shape({1, NUM_UNITS});
+ _fc_bias->size<loco::DataType::FLOAT32>(NUM_UNITS);
+
+ for (uint32_t i = 0; i < INPUT_DIM; ++i)
+ _fc_input->at<loco::DataType::FLOAT32>(i) = 1.0;
+
+ for (uint32_t i = 0; i < INPUT_DIM * NUM_UNITS; ++i)
+ _fc_weights->at<loco::DataType::FLOAT32>(i) = 1.0;
+
+ for (uint32_t i = 0; i < NUM_UNITS; ++i)
+ _fc_bias->at<loco::DataType::FLOAT32>(i) = 0.0;
+
+ _output->from(_fc);
+ }
+
+protected:
+ void init() final {}
+
+protected:
+ loco::Node *createFoldedPattern() final { return nullptr; }
+
+protected:
+ luci::CircleConst *getFoldedPattern() final
+ {
+ return loco::must_cast<luci::CircleConst *>(_output->from());
+ }
+
+protected:
+ luci::CircleFullyConnected *_fc = nullptr;
+ luci::CircleConst *_fc_input = nullptr;
+ luci::CircleConst *_fc_weights = nullptr;
+ luci::CircleConst *_fc_bias = nullptr;
+#undef INPUT_DIM
+#undef NUM_UNITS
+};
+
+} // namespace
+
+TEST_F(FoldFullyConnectedTest, fold_fc)
+{
+ luci::FoldFullyConnectedPass pass;
+ ASSERT_TRUE(pass.run(&_g));
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_EQ(folded_const->dtype(), loco::DataType::FLOAT32);
+ EXPECT_EQ(1, folded_const->rank());
+ EXPECT_EQ(32, folded_const->dim(0));
+ EXPECT_EQ(32, folded_const->size<loco::DataType::FLOAT32>());
+ for (uint32_t i = 0; i < 32; ++i)
+ EXPECT_NEAR(folded_const->at<loco::DataType::FLOAT32>(i), 80,
+ std::numeric_limits<float>::min());
+}
+
+TEST_F(FoldFullyConnectedTest, fold_fc_no_bias)
+{
+ auto no_bias = _g.nodes()->create<luci::CircleOutputExclude>();
+ _fc->bias(no_bias);
+
+ luci::FoldFullyConnectedPass pass;
+ ASSERT_TRUE(pass.run(&_g));
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+ EXPECT_EQ(1, folded_const->rank());
+ EXPECT_EQ(32, folded_const->dim(0));
+ EXPECT_EQ(32, folded_const->size<loco::DataType::FLOAT32>());
+ for (uint32_t i = 0; i < 32; ++i)
+ EXPECT_NEAR(folded_const->at<loco::DataType::FLOAT32>(i), 80,
+ std::numeric_limits<float>::min());
+}
+
+TEST_F(FoldFullyConnectedTest, fold_fc_NEG)
+{
+ auto new_fc = _g.nodes()->create<luci::CircleFullyConnected>();
+ _fc->input(new_fc);
+
+ luci::FoldFullyConnectedPass pass;
+ ASSERT_FALSE(pass.run(&_g));
+}
+
+TEST_F(FoldFullyConnectedTest, fold_fc_weight_format_NEG)
+{
+ auto new_fc = _g.nodes()->create<luci::CircleFullyConnected>();
+ _fc->weights_format(luci::CircleFullyConnected::WeightsFormat::SHUFFLED4x16INT8);
+
+ luci::FoldFullyConnectedPass pass;
+ ASSERT_FALSE(pass.run(&_g));
+}
return new_reshape;
}
+bool forward_reshape(luci::CircleReshape *reshape, luci::CircleAbs *abs)
+{
+ assert(reshape != nullptr); // FIX_CALLER_UNLESS
+ assert(abs != nullptr); // FIX_CALLER_UNLESS
+
+ auto new_reshape = create_cloned_reshape(reshape);
+ if (not new_reshape)
+ return false;
+
+ // reconnect network
+ loco::replace(abs).with(new_reshape);
+ abs->x(reshape->tensor());
+ new_reshape->tensor(abs);
+
+ // Do shape inference for this node again.
+ abs->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ return true;
+}
+
bool forward_reshape(luci::CircleReshape *reshape, luci::CircleNeg *neg)
{
assert(reshape != nullptr);
return false;
}
+ bool visit(luci::CircleAbs *node)
+ {
+ auto reshape = as_reshape(node->x());
+ if (reshape == nullptr)
+ return false;
+ return forward_reshape(reshape, node);
+ }
+
bool visit(luci::CircleNeg *node)
{
auto reshape = as_reshape(node->x());
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForwardTransposeOpPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Service/CircleNodeClone.h>
+
+using namespace luci;
+
+namespace
+{
+
+// Create new Transpose Op including perm
+// Return nullptr if failed
+CircleTranspose *create_cloned_transpose(CircleTranspose *transpose)
+{
+ assert(transpose != nullptr); // FIX_CALLER_UNLESS
+
+ auto perm = dynamic_cast<CircleConst *>(transpose->perm());
+ if (not perm)
+ return nullptr;
+
+ CircleConst *cloned_perm = clone(perm);
+ if (cloned_perm == nullptr)
+ return nullptr;
+
+ cloned_perm->name(perm->name() + "_C");
+ luci::add_origin(cloned_perm, luci::get_origin(perm));
+
+ auto cloned_node = clone_node(transpose, transpose->graph());
+ if (cloned_node == nullptr)
+ return nullptr;
+
+ auto new_transpose = loco::must_cast<luci::CircleTranspose *>(cloned_node);
+ new_transpose->perm(cloned_perm);
+ new_transpose->name(transpose->name() + "_C");
+ luci::add_origin(new_transpose, luci::get_origin(transpose));
+
+ return new_transpose;
+}
+
+uint32_t cal_offset(const std::vector<uint32_t> &shape, const std::vector<uint32_t> &indices)
+{
+ assert(shape.size() == indices.size()); // FIX_CALLER_UNLESS
+
+ uint32_t offset = 0;
+ for (uint32_t i = 0; i < indices.size(); i++)
+ {
+ uint32_t index = indices[i];
+ for (uint32_t j = shape.size() - 1; j > i; j--)
+ {
+ index *= shape[j];
+ }
+ offset += index;
+ }
+ return offset;
+}
+
+// Return reverse-transpose of 'node'
+// i.e., Transpose(return value) = node
+CircleConst *reverse_transposed(CircleConst *node, std::vector<uint32_t> &t)
+{
+ assert(node->rank() == t.size()); // FIX_CALLER_UNLESS
+ assert(node->rank() == 4); // FIX_CALLER_UNLESS
+
+ std::vector<uint32_t> orig_shape(node->rank());
+ std::vector<uint32_t> new_shape(node->rank());
+
+ for (uint32_t i = 0; i < node->rank(); i++)
+ {
+ assert(t[i] < node->rank()); // FIX_CALLER_UNLESS
+
+ orig_shape[i] = node->dim(i).value();
+ new_shape[t[i]] = node->dim(i).value();
+ }
+
+ auto clone_const = clone(node);
+ for (uint32_t i = 0; i < node->rank(); i++)
+ clone_const->dim(i).set(new_shape[i]);
+
+ clone_const->name(clone_const->name() + "_r_transposed");
+ add_origin(clone_const, luci::get_origin(node));
+
+ for (uint32_t n = 0; n < clone_const->dim(0).value(); n++)
+ {
+ for (uint32_t h = 0; h < clone_const->dim(1).value(); h++)
+ {
+ for (uint32_t w = 0; w < clone_const->dim(2).value(); w++)
+ {
+ for (uint32_t c = 0; c < clone_const->dim(3).value(); c++)
+ {
+ std::vector<uint32_t> new_indices{n, h, w, c};
+ std::vector<uint32_t> orig_indices{new_indices[t[0]], new_indices[t[1]],
+ new_indices[t[2]], new_indices[t[3]]};
+
+ const auto data = node->at<loco::DataType::FLOAT32>(cal_offset(orig_shape, orig_indices));
+ clone_const->at<loco::DataType::FLOAT32>(cal_offset(new_shape, new_indices)) = data;
+ }
+ }
+ }
+ }
+
+ return clone_const;
+}
+
+bool check_rank_four(const CircleConst *c) { return c->rank() == 4; }
+
+// Return true if below conditions are met
+// 1. t->perm() is CircleConst
+// 2. t->perm() is S32
+bool check_perm(const CircleTranspose *t)
+{
+ auto perm = dynamic_cast<CircleConst *>(t->perm());
+ if (not perm)
+ return false;
+
+ switch (perm->dtype())
+ {
+ case loco::DataType::S32:
+ for (uint32_t i = 0; i < perm->size<loco::DataType::S32>(); i++)
+ {
+ auto data = perm->at<loco::DataType::S32>(i);
+ // TODO Support not normalized index
+ if (data < 0 or data >= static_cast<int32_t>(t->rank()))
+ return false;
+ }
+ break;
+ // TODO Support S64 data type
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+#define RETURN_FALSE_UNLESS(COND) \
+ if (not(COND)) \
+ return false;
+
+// Elementwise Binary Operator with const
+class EBOWithConstPattern final : public CircleNodeMutableVisitor<bool>
+{
+private:
+ template <typename CIRCLE_OP_PTR> bool has_pattern(CIRCLE_OP_PTR node)
+ {
+ if (auto x = dynamic_cast<luci::CircleConst *>(node->x()))
+ {
+ if (auto y = dynamic_cast<luci::CircleTranspose *>(node->y()))
+ {
+ RETURN_FALSE_UNLESS(check_rank_four(x));
+ RETURN_FALSE_UNLESS(check_perm(y));
+
+ auto new_const = gen_new_const(y, x);
+ assert(new_const); // FIX_ME_UNLESS
+
+ auto new_transpose = create_cloned_transpose(y);
+ assert(new_transpose); // FIX_ME_UNLESS
+
+ // Reconnect network
+ node->x(new_const);
+ node->y(y->a());
+ loco::replace(node).with(new_transpose);
+ new_transpose->a(node);
+
+ // Do shape inference for this node again.
+ node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ return true;
+ }
+ }
+
+ if (auto y = dynamic_cast<luci::CircleConst *>(node->y()))
+ {
+ if (auto x = dynamic_cast<luci::CircleTranspose *>(node->x()))
+ {
+ RETURN_FALSE_UNLESS(check_rank_four(y));
+ RETURN_FALSE_UNLESS(check_perm(x));
+
+ auto new_const = gen_new_const(x, y);
+ assert(new_const); // FIX_ME_UNLESS
+
+ auto new_transpose = create_cloned_transpose(x);
+ assert(new_transpose); // FIX_ME_UNLESS
+
+ // Reconnect network
+ node->y(new_const);
+ node->x(x->a());
+ loco::replace(node).with(new_transpose);
+ new_transpose->a(node);
+
+ // Do shape inference for this node again.
+ node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+public:
+ // Default
+ bool visit(luci::CircleNode *) { return false; }
+
+ bool visit(luci::CircleAdd *node) { return has_pattern(node); }
+
+ bool visit(luci::CircleMul *node) { return has_pattern(node); }
+
+private:
+ // Return a new const node after Tranpose Op is forwarded
+ // Return nullptr if unsupported cases
+ CircleConst *gen_new_const(CircleTranspose *t, CircleConst *c)
+ {
+ const auto perm = dynamic_cast<CircleConst *>(t->perm());
+
+ // Only support constant perm
+ if (not perm)
+ return nullptr;
+
+ std::vector<uint32_t> perm_data;
+ switch (perm->dtype())
+ {
+ case loco::DataType::S32:
+ for (uint32_t i = 0; i < perm->size<loco::DataType::S32>(); i++)
+ {
+ auto data = perm->at<loco::DataType::S32>(i);
+ assert(data >= 0 and data < static_cast<int32_t>(t->rank()));
+ perm_data.emplace_back(static_cast<uint32_t>(data));
+ }
+ break;
+ // TODO Support S64 data type
+ default:
+ return nullptr;
+ }
+
+ assert(perm_data.size() == t->rank()); // FIX_CALLER_UNLESS
+
+ return reverse_transposed(c, perm_data);
+ }
+};
+
+// Elementwise Unary Operator
+class EwUnaryPattern final : public CircleNodeMutableVisitor<bool>
+{
+private:
+ // input is 'x'
+ template <typename CIRCLE_OP_PTR> bool has_pattern_x(CIRCLE_OP_PTR node)
+ {
+ if (auto x = dynamic_cast<luci::CircleTranspose *>(node->x()))
+ {
+ RETURN_FALSE_UNLESS(check_perm(x));
+
+ auto new_transpose = create_cloned_transpose(x);
+ assert(new_transpose); // FIX_ME_UNLESS
+
+ // Reconnect network
+ node->x(x->a());
+ loco::replace(node).with(new_transpose);
+ new_transpose->a(node);
+
+ // Do shape inference for this node again.
+ node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ return true;
+ }
+
+ return false;
+ }
+
+public:
+ // Default
+ bool visit(luci::CircleNode *) { return false; }
+
+ bool visit(luci::CircleAbs *node) { return has_pattern_x(node); }
+};
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ * |
+ * [CircleNode] [CircleConst]
+ * | /
+ * [CircleTranspose] [CircleConst]
+ * / | /
+ * [CircleNode] [(BinaryOp)]
+ * | | \
+ * | | [CircleNode]
+ * | | |
+ *
+ * BinaryOp: CircleAdd, CircleMul, ...
+ *
+ * |
+ * [CircleNode] [CircleConst]
+ * | /
+ * [CircleTranspose]
+ * / |
+ * [CircleNode] [(UnaryOp)]
+ * | | \
+ * | | [CircleNode]
+ * | | |
+ *
+ * UnaryOp: CircleAbs, ...
+ *
+ * AFTER
+ * |
+ * [CircleConst] [CircleNode] [CircleConst(updated)]
+ * | / | /
+ * [CircleTranspose] [(BinaryOp)] [CircleConst]
+ * | | /
+ * [CircleNode] [CircleTranspose]
+ * | | \
+ * | | [CircleNode]
+ * | | |
+ *
+ * |
+ * [CircleConst] [CircleNode]
+ * | / |
+ * [CircleTranspose] [(UnaryOp)] [CircleConst]
+ * | | /
+ * [CircleNode] [CircleTranspose]
+ * | | \
+ * | | [CircleNode]
+ * | | |
+ *
+ * Note: new [CircleTranspose] is added after [(BinaryOp)]
+ */
+bool ForwardTransposeOpPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ EBOWithConstPattern eboc;
+ EwUnaryPattern ewu;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ if (circle_node->accept(&eboc))
+ changed = true;
+ else if (circle_node->accept(&ewu))
+ changed = true;
+ }
+ return changed;
+}
+
+#undef RETURN_FALSE_UNLESS
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForwardTransposeOpPass.h"
+#include "luci/Pass/CircleShapeInferencePass.h"
+
+#include <logo/Phase.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+namespace
+{
+
+using namespace luci::test;
+
+template <typename T> class TransposeBinaryOpGraphlet
+{
+public:
+ TransposeBinaryOpGraphlet() = default;
+
+public:
+ virtual ~TransposeBinaryOpGraphlet() = default;
+
+public:
+ void init(loco::Graph *g, const ShapeU32 shape_in, const ShapeU32 perm)
+ {
+ std::vector<uint32_t> shape_in_v = shape_in;
+ std::vector<uint32_t> perm_v = perm;
+
+ assert(shape_in_v.size() == perm_v.size()); // FIX_CALLER_UNLESS
+
+ _perm = g->nodes()->create<luci::CircleConst>();
+ _const = g->nodes()->create<luci::CircleConst>();
+ _transpose = g->nodes()->create<luci::CircleTranspose>();
+ _binary = g->nodes()->create<T>();
+
+ _perm->dtype(loco::DataType::S32);
+ _perm->rank(1);
+ _perm->dim(0).set(perm_v.size());
+ _perm->shape_status(luci::ShapeStatus::VALID);
+
+ _const->dtype(loco::DataType::FLOAT32);
+ _const->rank(shape_in_v.size());
+ for (uint32_t i = 0; i < shape_in_v.size(); i++)
+ _const->dim(i).set(shape_in_v[perm_v[i]]);
+ _const->shape_status(luci::ShapeStatus::VALID);
+
+ // values
+ const auto size = perm_v.size();
+ _perm->size<loco::DataType::S32>(size);
+ for (uint32_t i = 0; i < size; i++)
+ _perm->at<loco::DataType::S32>(i) = perm_v[i];
+
+ uint32_t elems = 1;
+ for (uint32_t i = 0; i < size; i++)
+ elems *= shape_in_v[i];
+
+ _const->size<loco::DataType::FLOAT32>(elems);
+ for (uint32_t i = 0; i < elems; i++)
+ _const->at<loco::DataType::FLOAT32>(i) = i;
+
+ _perm->name("transpose_perm");
+ _transpose->name("transpose");
+ _binary->name("binary");
+ }
+
+ luci::CircleTranspose *transpose(void) { return _transpose; }
+
+ void switch_xy(void)
+ {
+ assert(_binary); // FIX_CALLER_UNLESS
+ auto temp = _binary->x();
+ _binary->x(_binary->y());
+ _binary->y(temp);
+ }
+
+protected:
+ luci::CircleTranspose *_transpose = nullptr;
+ T *_binary = nullptr;
+ luci::CircleConst *_perm = nullptr;
+ luci::CircleConst *_const = nullptr;
+};
+
+using TransposeAddGraphlet = TransposeBinaryOpGraphlet<luci::CircleAdd>;
+using TransposeMulGraphlet = TransposeBinaryOpGraphlet<luci::CircleMul>;
+
+class ForwardTransposeToAddGraph : public TestIOGraph, public TransposeAddGraphlet
+{
+public:
+ void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+ {
+ TestIOGraph::init(shape_in, shape_out);
+ TransposeAddGraphlet::init(g(), shape_in, shape_out);
+
+ // connect network
+ _transpose->a(input());
+ _transpose->perm(_perm);
+ _binary->x(_transpose);
+ _binary->y(_const);
+
+ output()->from(_binary);
+ }
+};
+
+class ForwardTransposeToAddInvalidGraph : public TestIOGraph, public TransposeAddGraphlet
+{
+public:
+ void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+ {
+ TestIOGraph::init(shape_in, shape_out);
+ TransposeAddGraphlet::init(g(), shape_in, shape_out);
+
+ // connect network
+ _transpose->a(input());
+ _transpose->perm(_perm);
+ _binary->x(_transpose);
+ _binary->y(_transpose);
+
+ output()->from(_binary);
+ }
+};
+
+class ForwardTransposeToMulGraph : public TestIOGraph, public TransposeMulGraphlet
+{
+public:
+ void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+ {
+ TestIOGraph::init(shape_in, shape_out);
+ TransposeMulGraphlet::init(g(), shape_in, shape_out);
+
+ // connect network
+ _transpose->a(input());
+ _transpose->perm(_perm);
+ _binary->x(_transpose);
+ _binary->y(_const);
+
+ output()->from(_binary);
+ }
+};
+
+void run_phase(loco::Graph *g)
+{
+ logo::Phase phase;
+
+ // Default passes.
+ phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+
+ // Pass to test
+ phase.emplace_back(std::make_unique<luci::ForwardTransposeOpPass>());
+
+ logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+ phase_runner.run(phase);
+}
+
+class ForwardTransposeToAddGraphTest : public ::testing::Test
+{
+public:
+ void run_pass(void) { run_phase(_graph.g()); }
+
+protected:
+ ForwardTransposeToAddGraph _graph;
+};
+
+class ForwardTransposeToAddGraphNegTest : public ::testing::Test
+{
+public:
+ void run_pass(void) { run_phase(_graph.g()); }
+
+protected:
+ ForwardTransposeToAddInvalidGraph _graph;
+};
+
+class ForwardTransposeToMulGraphTest : public ::testing::Test
+{
+public:
+ void run_pass(void) { run_phase(_graph.g()); }
+
+protected:
+ ForwardTransposeToMulGraph _graph;
+};
+
+} // namespace
+
+TEST_F(ForwardTransposeToAddGraphTest, forward_add_xy)
+{
+ _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+ run_pass();
+
+ auto transpose = dynamic_cast<luci::CircleTranspose *>(_graph.output()->from());
+ EXPECT_NE(nullptr, transpose);
+ EXPECT_EQ(4, transpose->rank());
+ EXPECT_EQ(1, transpose->dim(0).value());
+ EXPECT_EQ(1, transpose->dim(1).value());
+ EXPECT_EQ(51, transpose->dim(2).value());
+ EXPECT_EQ(64, transpose->dim(3).value());
+
+ auto add = dynamic_cast<luci::CircleAdd *>(transpose->a());
+ EXPECT_NE(nullptr, add);
+ EXPECT_EQ(4, add->rank());
+ EXPECT_EQ(1, add->dim(0).value());
+ EXPECT_EQ(64, add->dim(1).value());
+ EXPECT_EQ(51, add->dim(2).value());
+ EXPECT_EQ(1, add->dim(3).value());
+
+ auto add_const = dynamic_cast<luci::CircleConst *>(add->y());
+ EXPECT_NE(nullptr, add_const);
+ EXPECT_EQ(4, add_const->rank());
+ EXPECT_EQ(1, add_const->dim(0).value());
+ EXPECT_EQ(64, add_const->dim(1).value());
+ EXPECT_EQ(51, add_const->dim(2).value());
+ EXPECT_EQ(1, add_const->dim(3).value());
+}
+
+TEST_F(ForwardTransposeToAddGraphTest, forward_add_yx)
+{
+ _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+ _graph.switch_xy();
+
+ run_pass();
+
+ auto transpose = dynamic_cast<luci::CircleTranspose *>(_graph.output()->from());
+ EXPECT_NE(nullptr, transpose);
+ EXPECT_EQ(4, transpose->rank());
+ EXPECT_EQ(1, transpose->dim(0).value());
+ EXPECT_EQ(1, transpose->dim(1).value());
+ EXPECT_EQ(51, transpose->dim(2).value());
+ EXPECT_EQ(64, transpose->dim(3).value());
+
+ auto mul = dynamic_cast<luci::CircleAdd *>(transpose->a());
+ EXPECT_NE(nullptr, mul);
+ EXPECT_EQ(4, mul->rank());
+ EXPECT_EQ(1, mul->dim(0).value());
+ EXPECT_EQ(64, mul->dim(1).value());
+ EXPECT_EQ(51, mul->dim(2).value());
+ EXPECT_EQ(1, mul->dim(3).value());
+
+ auto mul_const = dynamic_cast<luci::CircleConst *>(mul->x());
+ EXPECT_NE(nullptr, mul_const);
+ EXPECT_EQ(4, mul_const->rank());
+ EXPECT_EQ(1, mul_const->dim(0).value());
+ EXPECT_EQ(64, mul_const->dim(1).value());
+ EXPECT_EQ(51, mul_const->dim(2).value());
+ EXPECT_EQ(1, mul_const->dim(3).value());
+}
+
+TEST_F(ForwardTransposeToMulGraphTest, forward_mul_xy)
+{
+ _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+ run_pass();
+
+ auto transpose = dynamic_cast<luci::CircleTranspose *>(_graph.output()->from());
+ EXPECT_NE(nullptr, transpose);
+ EXPECT_EQ(4, transpose->rank());
+ EXPECT_EQ(1, transpose->dim(0).value());
+ EXPECT_EQ(1, transpose->dim(1).value());
+ EXPECT_EQ(51, transpose->dim(2).value());
+ EXPECT_EQ(64, transpose->dim(3).value());
+
+ auto mul = dynamic_cast<luci::CircleMul *>(transpose->a());
+ EXPECT_NE(nullptr, mul);
+ EXPECT_EQ(4, mul->rank());
+ EXPECT_EQ(1, mul->dim(0).value());
+ EXPECT_EQ(64, mul->dim(1).value());
+ EXPECT_EQ(51, mul->dim(2).value());
+ EXPECT_EQ(1, mul->dim(3).value());
+
+ auto mul_const = dynamic_cast<luci::CircleConst *>(mul->y());
+ EXPECT_NE(nullptr, mul_const);
+ EXPECT_EQ(4, mul_const->rank());
+ EXPECT_EQ(1, mul_const->dim(0).value());
+ EXPECT_EQ(64, mul_const->dim(1).value());
+ EXPECT_EQ(51, mul_const->dim(2).value());
+ EXPECT_EQ(1, mul_const->dim(3).value());
+}
+
+TEST_F(ForwardTransposeToMulGraphTest, forward_mul_yx)
+{
+ _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+ _graph.switch_xy();
+
+ run_pass();
+
+ auto transpose = dynamic_cast<luci::CircleTranspose *>(_graph.output()->from());
+ EXPECT_NE(nullptr, transpose);
+ EXPECT_EQ(4, transpose->rank());
+ EXPECT_EQ(1, transpose->dim(0).value());
+ EXPECT_EQ(1, transpose->dim(1).value());
+ EXPECT_EQ(51, transpose->dim(2).value());
+ EXPECT_EQ(64, transpose->dim(3).value());
+
+ auto mul = dynamic_cast<luci::CircleMul *>(transpose->a());
+ EXPECT_NE(nullptr, mul);
+ EXPECT_EQ(4, mul->rank());
+ EXPECT_EQ(1, mul->dim(0).value());
+ EXPECT_EQ(64, mul->dim(1).value());
+ EXPECT_EQ(51, mul->dim(2).value());
+ EXPECT_EQ(1, mul->dim(3).value());
+
+ auto mul_const = dynamic_cast<luci::CircleConst *>(mul->x());
+ EXPECT_NE(nullptr, mul_const);
+ EXPECT_EQ(4, mul_const->rank());
+ EXPECT_EQ(1, mul_const->dim(0).value());
+ EXPECT_EQ(64, mul_const->dim(1).value());
+ EXPECT_EQ(51, mul_const->dim(2).value());
+ EXPECT_EQ(1, mul_const->dim(3).value());
+}
+
+TEST_F(ForwardTransposeToAddGraphTest, forward_transpose_add_NEG)
+{
+ _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+ // Remove add
+ _graph.output()->from(_graph.transpose());
+
+ luci::ForwardTransposeOpPass pass;
+ EXPECT_FALSE(pass.run(_graph.g()));
+}
+
+TEST_F(ForwardTransposeToAddGraphNegTest, forward_transpose_add_non_const_NEG)
+{
+ _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+ luci::ForwardTransposeOpPass pass;
+ EXPECT_FALSE(pass.run(_graph.g()));
+}
+
+TEST_F(ForwardTransposeToMulGraphTest, forward_transpose_mul_NEG)
+{
+ _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+ // Remove mul
+ _graph.output()->from(_graph.transpose());
+
+ luci::ForwardTransposeOpPass pass;
+ EXPECT_FALSE(pass.run(_graph.g()));
+}
+
+// Unary
+
+namespace
+{
+
+template <typename T> class TransposeUnaryOpGraphlet
+{
+public:
+ TransposeUnaryOpGraphlet() = default;
+
+public:
+ virtual ~TransposeUnaryOpGraphlet() = default;
+
+public:
+ void init(loco::Graph *g, const ShapeU32 shape_in, const ShapeU32 perm)
+ {
+ std::vector<uint32_t> shape_in_v = shape_in;
+ std::vector<uint32_t> perm_v = perm;
+
+ assert(shape_in_v.size() == perm_v.size()); // FIX_CALLER_UNLESS
+
+ _perm = g->nodes()->create<luci::CircleConst>();
+ _const = g->nodes()->create<luci::CircleConst>();
+ _transpose = g->nodes()->create<luci::CircleTranspose>();
+ _unary = g->nodes()->create<T>();
+
+ _perm->dtype(loco::DataType::S32);
+ _perm->rank(1);
+ _perm->dim(0).set(perm_v.size());
+ _perm->shape_status(luci::ShapeStatus::VALID);
+
+ _const->dtype(loco::DataType::FLOAT32);
+ _const->rank(shape_in_v.size());
+ for (uint32_t i = 0; i < shape_in_v.size(); i++)
+ _const->dim(i).set(shape_in_v[perm_v[i]]);
+ _const->shape_status(luci::ShapeStatus::VALID);
+
+ // values
+ const auto size = perm_v.size();
+ _perm->size<loco::DataType::S32>(size);
+ for (uint32_t i = 0; i < size; i++)
+ _perm->at<loco::DataType::S32>(i) = perm_v[i];
+
+ uint32_t elems = 1;
+ for (uint32_t i = 0; i < size; i++)
+ elems *= shape_in_v[i];
+
+ _const->size<loco::DataType::FLOAT32>(elems);
+ for (uint32_t i = 0; i < elems; i++)
+ _const->at<loco::DataType::FLOAT32>(i) = i;
+
+ _perm->name("transpose_perm");
+ _transpose->name("transpose");
+ _unary->name("_unary");
+ }
+
+ luci::CircleTranspose *transpose(void) { return _transpose; }
+
+protected:
+ luci::CircleTranspose *_transpose = nullptr;
+ T *_unary = nullptr;
+ luci::CircleConst *_perm = nullptr;
+ luci::CircleConst *_const = nullptr;
+};
+
+using TransposeAbsGraphlet = TransposeUnaryOpGraphlet<luci::CircleAbs>;
+
+class ForwardTransposeToAbsGraph : public TestIOGraph, public TransposeAbsGraphlet
+{
+public:
+ void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+ {
+ TestIOGraph::init(shape_in, shape_out);
+ TransposeAbsGraphlet::init(g(), shape_in, shape_out);
+
+ // connect network
+ _transpose->a(input());
+ _transpose->perm(_perm);
+ _unary->x(_transpose);
+
+ output()->from(_unary);
+ }
+};
+
+class ForwardTransposeToAbsInvalidGraph : public TestIOGraph, public TransposeAbsGraphlet
+{
+public:
+ void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+ {
+ TestIOGraph::init(shape_in, shape_out);
+ TransposeAbsGraphlet::init(g(), shape_in, shape_out);
+
+ _relu = g()->nodes()->create<luci::CircleRelu>();
+ _relu->dtype(loco::DataType::FLOAT32);
+ _relu->name("relu");
+
+ // connect network
+ _relu->features(input());
+ _unary->x(_relu);
+
+ output()->from(_unary);
+ }
+
+protected:
+ luci::CircleRelu *_relu = nullptr;
+};
+
+class ForwardTransposeToAbsGraphTest : public ::testing::Test
+{
+public:
+ void run_pass(void) { run_phase(_graph.g()); }
+
+protected:
+ ForwardTransposeToAbsGraph _graph;
+};
+
+class ForwardTransposeToAbsGraphNegTest : public ::testing::Test
+{
+public:
+ void run_pass(void) { run_phase(_graph.g()); }
+
+protected:
+ ForwardTransposeToAbsInvalidGraph _graph;
+};
+
+} // namespace
+
+TEST_F(ForwardTransposeToAbsGraphTest, forward_abs_x)
+{
+ _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+ run_pass();
+
+ auto transpose = dynamic_cast<luci::CircleTranspose *>(_graph.output()->from());
+ EXPECT_NE(nullptr, transpose);
+ EXPECT_EQ(4, transpose->rank());
+ EXPECT_EQ(1, transpose->dim(0).value());
+ EXPECT_EQ(1, transpose->dim(1).value());
+ EXPECT_EQ(51, transpose->dim(2).value());
+ EXPECT_EQ(64, transpose->dim(3).value());
+
+ auto abs = dynamic_cast<luci::CircleAbs *>(transpose->a());
+ EXPECT_NE(nullptr, abs);
+ EXPECT_EQ(4, abs->rank());
+ EXPECT_EQ(1, abs->dim(0).value());
+ EXPECT_EQ(64, abs->dim(1).value());
+ EXPECT_EQ(51, abs->dim(2).value());
+ EXPECT_EQ(1, abs->dim(3).value());
+}
+
+TEST_F(ForwardTransposeToAbsGraphTest, forward_transpose_abs_NEG)
+{
+ _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+ // Remove abs
+ _graph.output()->from(_graph.transpose());
+
+ luci::ForwardTransposeOpPass pass;
+ EXPECT_FALSE(pass.run(_graph.g()));
+}
+
+TEST_F(ForwardTransposeToAbsGraphNegTest, forward_transpose_abs_non_transpose_NEG)
+{
+ _graph.init({1, 64, 51, 1}, {0, 3, 2, 1});
+
+ luci::ForwardTransposeOpPass pass;
+ EXPECT_FALSE(pass.run(_graph.g()));
+}
if (not(addition->dim(rank - 1) == weights->dim(0)))
return false;
+ auto bias = loco::must_cast<luci::CircleNode *>(fc->bias());
+
+ // We only support (1) constant bias (2) no bias
+ // If bias is neither (1) nor (2), it would be a feature map
+ if (bias->opcode() != luci::CircleOpcode::CIRCLECONST and
+ bias->opcode() != luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+ return false;
+
auto fused_bias = luci::clone(addition);
// Add existing bias values
public:
luci::CircleFullyConnected *fc() { return _fc; }
+public:
+ void to_fm_bias(void)
+ {
+ assert(_fc != nullptr); // FIX_ME_UNLESS
+
+ auto new_fc = _fc->graph()->nodes()->create<luci::CircleFullyConnected>();
+ _fc->bias(new_fc);
+ }
+
protected:
luci::CircleFullyConnected *_fc = nullptr;
luci::CircleAdd *_add = nullptr;
EXPECT_EQ(i, bias->at<loco::DataType::FLOAT32>(i));
}
}
+
+TEST_F(FuseAddWithFullyConnectedPassTest, fm_bias_NEG)
+{
+ g.init();
+
+ // Bias is a feature map. Add is not fused.
+ g.to_fm_bias();
+
+ auto ret = pass.run(g.g());
+ EXPECT_EQ(false, ret);
+}
if (output_node->index() == 0 || (int)output_node->index() > original_output_cnt)
{
auto noOp = main_graph->nodes()->create<luci::CircleOutputExclude>();
- noOp->dtype(loco::DataType::FLOAT32); // TODO Remove this setting
output_node->from(noOp);
changed = true;
}
namespace
{
+
+template <class CIRCLENODE>
+void replace_with_relu(luci::CircleNode *target, luci::CircleNode *feature,
+ const std::string &relu_name)
+{
+ assert(target != nullptr);
+ assert(feature != nullptr);
+
+ auto relu = target->graph()->nodes()->create<CIRCLENODE>();
+ relu->features(feature);
+ relu->name(relu_name);
+ luci::add_origin(relu, luci::get_origin(target));
+
+ replace(target).with(relu);
+}
+
+} // namespace
+
+namespace
+{
/**
* Fuse Mul-Add to TransposeConv if possible.
*
* | / / | /
* [CircleTransposeConv] [CircleAdd]
* |
- * ([CircleRelu6])
+ * ([CircleRelu]/[CircleRelu6])
* |
*
- * Note: CircleRelu6 is inserted if Add activation is ReLU6
+ * Note: CircleRelu or CircleRelu6 is inserted if Add activation is ReLU/ReLU6
*/
bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
{
if (add->dtype() != loco::DataType::FLOAT32)
return false;
if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
- add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
+ add->fusedActivationFunction() != luci::FusedActFunc::RELU6 &&
+ add->fusedActivationFunction() != luci::FusedActFunc::RELU)
return false;
// tconv bias is optional
luci::add_origin(fused_tconv, luci::get_origin(bias));
}
- if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6)
+ switch (add->fusedActivationFunction())
{
- // separate relu op from add op
- auto relu = add->graph()->nodes()->create<luci::CircleRelu6>();
- relu->features(fused_tconv);
- relu->name(name + "/Relu6");
- luci::add_origin(relu, luci::get_origin(add));
+ case luci::FusedActFunc::RELU6:
+ replace_with_relu<luci::CircleRelu6>(add, fused_tconv, name + "/Relu6");
+ break;
- replace(add).with(relu);
- }
- else
- {
- replace(add).with(fused_tconv);
+ case luci::FusedActFunc::RELU:
+ replace_with_relu<luci::CircleRelu>(add, fused_tconv, name + "/Relu");
+ break;
+
+ case luci::FusedActFunc::NONE:
+ replace(add).with(fused_tconv);
+ break;
+
+ default:
+ assert(false);
+ break;
}
return true;
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FusePReluPass.h"
+#include "helpers/NodeFiller.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/CircleNodeClone.h>
+
+#include <cassert>
+
+// Helper to fuse PRelu
+namespace
+{
+
+/**
+ * Below diagram shows PRelu pattern to fuse.
+ * - this pattern will be replaced with one PRelu
+ *
+ * [In]
+ * |
+ * V
+ * +---- ifm ----+
+ * | | |
+ * | | V
+ * | | abs
+ * | V |
+ * | sub <---+
+ * | |
+ * | V
+ * | mul_alpha (alpha of PRelu)
+ * | |
+ * V V
+ * relu mul_half (0.5)
+ * | |
+ * | V
+ * +---> add
+ * |
+ * V
+ * [Out]
+ *
+ */
+class PReluPattern final
+{
+public:
+ PReluPattern(luci::CircleAdd *candidate)
+ {
+ assert(candidate);
+ _add_ofm = candidate;
+ }
+
+public:
+ bool matched();
+
+public:
+ luci::CircleNode *_ifm = nullptr;
+ luci::CircleRelu *_relu = nullptr;
+ luci::CircleAbs *_abs = nullptr;
+ luci::CircleSub *_sub = nullptr;
+ luci::CircleMul *_mul_alpha = nullptr;
+ luci::CircleMul *_mul_half = nullptr;
+ luci::CircleAdd *_add_ofm = nullptr;
+ luci::CircleConst *_const_alpha = nullptr;
+ luci::CircleConst *_const_half = nullptr;
+};
+
+#define CHECK_OR_FALSE(condition) \
+ if (not(condition)) \
+ return false;
+
+bool PReluPattern::matched()
+{
+ // check pattern
+ CHECK_OR_FALSE(luci::fill(&_relu, &_mul_half).with_commutative_args_of(_add_ofm));
+ CHECK_OR_FALSE(luci::fill(&_mul_alpha, &_const_half).with_commutative_args_of(_mul_half));
+ CHECK_OR_FALSE(luci::fill(&_sub, &_const_alpha).with_commutative_args_of(_mul_alpha));
+
+ CHECK_OR_FALSE(luci::fill(&_ifm, &_abs).with_args_of(_sub));
+
+ CHECK_OR_FALSE(_relu->features() == _ifm);
+ CHECK_OR_FALSE(_abs->x() == _ifm);
+
+ // Check Activation to be NONE
+ CHECK_OR_FALSE(_sub->fusedActivationFunction() == luci::FusedActFunc::NONE);
+ CHECK_OR_FALSE(_mul_alpha->fusedActivationFunction() == luci::FusedActFunc::NONE);
+ CHECK_OR_FALSE(_mul_half->fusedActivationFunction() == luci::FusedActFunc::NONE);
+ CHECK_OR_FALSE(_add_ofm->fusedActivationFunction() == luci::FusedActFunc::NONE);
+
+ // TODO support other types?
+ // check if _const_half is really FLOAT32 & 0.5
+ CHECK_OR_FALSE(_const_half->dtype() == loco::DataType::FLOAT32);
+ CHECK_OR_FALSE(_const_half->size<loco::DataType::FLOAT32>() == 1);
+ CHECK_OR_FALSE(_const_half->at<loco::DataType::FLOAT32>(0) == 0.5);
+
+ // check _const_alpha condition
+ CHECK_OR_FALSE(_const_alpha->dtype() == loco::DataType::FLOAT32);
+ // TODO add more if needed
+
+ return true;
+}
+
+#undef CHECK_OR_FALSE
+
+class FusePRelu final
+{
+public:
+ FusePRelu(const PReluPattern &p) : _p(p) {}
+
+public:
+ void apply(void);
+
+private:
+ luci::CirclePRelu *create_prelu(loco::Graph *graph);
+
+private:
+ const PReluPattern &_p;
+};
+
+luci::CirclePRelu *FusePRelu::create_prelu(loco::Graph *graph)
+{
+ assert(graph);
+
+ auto prelu = graph->nodes()->create<luci::CirclePRelu>();
+ prelu->input(_p._ifm);
+ prelu->alpha(_p._const_alpha);
+ prelu->name(_p._add_ofm->name() + "_prelu");
+ return prelu;
+}
+
+void FusePRelu::apply()
+{
+ auto graph = _p._add_ofm->graph();
+
+ auto prelu = create_prelu(graph);
+
+ // set origin
+ std::vector<std::shared_ptr<luci::CircleNodeOrigin>> origin_vec{
+ luci::get_origin(_p._relu), luci::get_origin(_p._abs), luci::get_origin(_p._sub),
+ luci::get_origin(_p._mul_alpha), luci::get_origin(_p._mul_half), luci::get_origin(_p._add_ofm)};
+
+ luci::add_origin(prelu, luci::composite_origin(origin_vec));
+
+ replace(_p._add_ofm).with(prelu);
+}
+
+} // namespace
+
+namespace
+{
+
+bool fuse_prelu(luci::CircleAdd *add)
+{
+ assert(add);
+
+ PReluPattern pattern(add);
+ if (pattern.matched())
+ {
+ FusePRelu fuse(pattern);
+ fuse.apply();
+ return true;
+ }
+ return false;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FusePReluPass::run(loco::Graph *g)
+{
+ bool changed = false;
+
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto add = dynamic_cast<luci::CircleAdd *>(node);
+ if (not add)
+ continue;
+
+ if (fuse_prelu(add))
+ changed = true;
+ }
+
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FusePReluPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class PReluGraphlet
+{
+public:
+ PReluGraphlet() = default;
+
+ void init(loco::Graph *g)
+ {
+ _abs = g->nodes()->create<luci::CircleAbs>();
+ _sub = g->nodes()->create<luci::CircleSub>();
+ _mul_alpha = g->nodes()->create<luci::CircleMul>();
+ _mul_half = g->nodes()->create<luci::CircleMul>();
+ _relu = g->nodes()->create<luci::CircleRelu>();
+ _add = g->nodes()->create<luci::CircleAdd>();
+ _const_alpha = g->nodes()->create<luci::CircleConst>();
+ _const_half = g->nodes()->create<luci::CircleConst>();
+
+ _sub->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _mul_alpha->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _mul_half->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+ _abs->name("abs");
+ _sub->name("sub");
+ _mul_alpha->name("mul_alpha");
+ _mul_half->name("mul_half");
+ _relu->name("relu");
+ _add->name("add");
+ _const_alpha->name("const_alpha");
+ _const_half->name("const_half");
+
+ _const_alpha->dtype(loco::DataType::FLOAT32);
+ _const_alpha->size<loco::DataType::FLOAT32>(1);
+ _const_alpha->shape({1});
+ _const_alpha->at<loco::DataType::FLOAT32>(0) = 0.1;
+ _const_alpha->shape_status(luci::ShapeStatus::VALID);
+
+ _const_half->dtype(loco::DataType::FLOAT32);
+ _const_half->size<loco::DataType::FLOAT32>(1);
+ _const_half->shape({1});
+ _const_half->at<loco::DataType::FLOAT32>(0) = 0.5;
+ _const_half->shape_status(luci::ShapeStatus::VALID);
+ }
+
+ void invalid_half() { _const_half->at<loco::DataType::FLOAT32>(0) = 0.1; }
+ void invalid_act() { _add->fusedActivationFunction(luci::FusedActFunc::RELU); }
+
+protected:
+ luci::CircleAbs *_abs = nullptr;
+ luci::CircleSub *_sub = nullptr;
+ luci::CircleMul *_mul_alpha = nullptr;
+ luci::CircleMul *_mul_half = nullptr;
+ luci::CircleRelu *_relu = nullptr;
+ luci::CircleAdd *_add = nullptr;
+ luci::CircleConst *_const_alpha = nullptr;
+ luci::CircleConst *_const_half = nullptr;
+};
+
+class FusePReluTestGraph : public TestIOGraph, public PReluGraphlet
+{
+public:
+ FusePReluTestGraph() = default;
+
+ void init(void)
+ {
+ TestIOGraph::init({1}, {1});
+ PReluGraphlet::init(g());
+
+ _relu->features(input());
+ _abs->x(input());
+ _sub->x(input());
+ _sub->y(_abs);
+ _mul_alpha->x(_sub);
+ _mul_alpha->y(_const_alpha);
+ _mul_half->x(_mul_alpha);
+ _mul_half->y(_const_half);
+ _add->x(_relu);
+ _add->y(_mul_half);
+
+ output()->from(_add);
+ }
+};
+
+class FusePReluTestNegGraph : public TestIOGraph, public PReluGraphlet
+{
+public:
+ FusePReluTestNegGraph() = default;
+
+ void init(void)
+ {
+ TestIOGraph::init({1}, {1});
+ PReluGraphlet::init(g());
+
+ _relu->features(input());
+ _abs->x(input());
+ // NOTE x and y are incorrect
+ _sub->x(_abs);
+ _sub->y(input());
+ _mul_alpha->x(_sub);
+ _mul_alpha->y(_const_alpha);
+ _mul_half->x(_mul_alpha);
+ _mul_half->y(_const_half);
+ _add->x(_relu);
+ _add->y(_mul_half);
+
+ output()->from(_add);
+ }
+};
+
+} // namespace
+
+TEST(FusePReluPassTest, name)
+{
+ luci::FusePReluPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(FusePReluPassTest, fuse)
+{
+ FusePReluTestGraph g;
+ luci::FusePReluPass pass;
+
+ g.init();
+
+ EXPECT_TRUE(pass.run(g.g()));
+}
+
+TEST(FusePReluPassTest, fuse_invalid_half_NEG)
+{
+ FusePReluTestNegGraph g;
+ luci::FusePReluPass pass;
+
+ g.init();
+ g.invalid_half();
+
+ EXPECT_FALSE(pass.run(g.g()));
+}
+
+TEST(FusePReluPassTest, fuse_invalid_act_NEG)
+{
+ FusePReluTestNegGraph g;
+ luci::FusePReluPass pass;
+
+ g.init();
+ g.invalid_act();
+
+ EXPECT_FALSE(pass.run(g.g()));
+}
+
+TEST(FusePReluPassTest, fuse_NEG)
+{
+ FusePReluTestNegGraph g;
+ luci::FusePReluPass pass;
+
+ g.init();
+
+ EXPECT_FALSE(pass.run(g.g()));
+}
node->dtype() == loco::DataType::S64); // bias (int16 quant)
}
+bool is_fp32(const CircleNode *node) { return node->dtype() == loco::DataType::FLOAT32; }
+
uint8_t fp32_to_uint8_cast(float f)
{
assert(std::numeric_limits<uint8_t>::min() <= f);
: scale_factor_from_max_side;
// protect scale from being very low to avoid overflow/underflow
- if (scaling_factor < 1e-9)
- scaling_factor = 1e-9;
+ if (scaling_factor < 1e-8)
+ scaling_factor = 1e-8;
zp = 0;
nudged_min = static_cast<float>(qmin_double * scaling_factor);
// Return true if the node is quantized
bool is_quantized(const CircleNode *node);
+// Return true if the node is fp32
+bool is_fp32(const CircleNode *node);
+
enum ActivationQType
{
MinMax, // Quantize using recorded min/max
LOGGER(l);
INFO(l) << "QuantizeActivation visit node: " << node->name() << std::endl;
- // Check if this is already quantized
- if (is_quantized(node))
- return;
-
- // Check if this is bool type (bool type is not quantized)
- if (node->dtype() == loco::DataType::BOOL)
+ // Check if node is fp32
+ if (not is_fp32(node))
return;
// Check if this is const (const activation is handled by QuantizeConstInputActivation)
{ \
auto input = node->INPUT_NAME(); \
auto const_node = dynamic_cast<luci::CircleConst *>(input); \
- if (const_node && !is_quantized(const_node)) \
+ if (const_node && is_fp32(const_node)) \
{ \
auto new_const = luci::clone(const_node); \
quant_const(new_const, _output_type); \
{ \
auto input1 = node->INPUT_NAME1(); \
auto const_node1 = dynamic_cast<luci::CircleConst *>(input1); \
- if (const_node1 && !is_quantized(const_node1)) \
+ if (const_node1 && is_fp32(const_node1)) \
{ \
auto new_const1 = luci::clone(const_node1); \
quant_const(new_const1, _output_type); \
} \
auto input2 = node->INPUT_NAME2(); \
auto const_node2 = dynamic_cast<luci::CircleConst *>(input2); \
- if (const_node2 && !is_quantized(const_node2)) \
+ if (const_node2 && is_fp32(const_node2)) \
{ \
auto new_const2 = luci::clone(const_node2); \
quant_const(new_const2, _output_type); \
}
// Ops that receive a single activation as an input
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleAbs, x)
QUANTIZE_SINGLE_CONST_INPUT(luci::CircleArgMax, input)
QUANTIZE_SINGLE_CONST_INPUT(luci::CircleArgMin, input)
QUANTIZE_SINGLE_CONST_INPUT(luci::CircleBatchToSpaceND, input)
{
auto input_node = node->inputs(i);
auto const_node = dynamic_cast<luci::CircleConst *>(input_node);
- if (const_node && !is_quantized(const_node))
+ if (const_node && is_fp32(const_node))
{
auto new_const = luci::clone(const_node);
quant_const(new_const, _output_type);
void visit(luci::CircleNode *node);
// Ops that receive a single activation as an input
+ void visit(luci::CircleAbs *node);
void visit(luci::CircleArgMax *node);
void visit(luci::CircleArgMin *node);
void visit(luci::CircleBatchToSpaceND *node);
}
}
+// TODO Reduce duplicate code with QuantizeDequantizeWeights
+void sym_wquant_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
+ std::vector<float> &scaling_factor, std::vector<int64_t> &zp,
+ std::vector<float> &nudged_min, std::vector<float> &nudged_max,
+ int32_t &channel_dim_index)
+{
+ assert(node->dtype() == loco::DataType::FLOAT32);
+ const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
+ const int32_t kMinScale = -kMaxScale;
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+ std::vector<int32_t> quantized_values(size);
+
+ for (size_t i = 0; i < min.size(); ++i)
+ {
+ compute_sym_scale_zp(min[i], max[i], scaling_factor[i], zp[i], nudged_min[i], nudged_max[i]);
+ }
+
+ auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+ int channel_idx = indices[channel_dim_index];
+ const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+ auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+ data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+ data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+ quantized_values[cal_offset(dimension, indices)] =
+ static_cast<int32_t>(std::round(data * scaling_factor_inv));
+ };
+
+ iterate_per_channel(node, channel_dim_index, quantize);
+
+ node->dtype(loco::DataType::S16); // change the type of tensor
+ node->size<loco::DataType::S16>(size); // resize tensor
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ node->at<loco::DataType::S16>(i) =
+ std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ }
+}
+
+void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
+ int32_t &channel_dim_index)
+{
+ loco::TensorShape dimension;
+ dimension.rank(4);
+
+ if (!get_channel_dim_index(node, dimension, channel_dim_index))
+ {
+ throw std::runtime_error("Failed to find channel index in " + node->name());
+ }
+ auto size = dimension.dim(channel_dim_index).value();
+
+ std::vector<bool> has_min_max_value(size, false);
+ min.resize(size);
+ max.resize(size);
+
+ auto cal_minmax = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+ int channel_idx = indices[channel_dim_index];
+ auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+ if (has_min_max_value[channel_idx])
+ {
+ min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
+ max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
+ }
+ else
+ {
+ min[channel_idx] = data;
+ max[channel_idx] = data;
+ has_min_max_value[channel_idx] = true;
+ }
+ };
+
+ iterate_per_channel(node, channel_dim_index, cal_minmax);
+}
+
+void asymmetric_wquant_per_channel(CircleConst *node, std::vector<float> &min,
+ std::vector<float> &max, std::vector<float> &scaling_factor,
+ std::vector<int64_t> &zp, std::vector<float> &nudged_min,
+ std::vector<float> &nudged_max, int32_t &channel_dim_index)
+{
+ assert(node->dtype() == loco::DataType::FLOAT32);
+
+ const int32_t kMinScale = 0;
+ const int32_t kMaxScale = 255;
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+ std::vector<int32_t> quantized_values(size);
+
+ for (size_t i = 0; i < min.size(); ++i)
+ {
+ compute_asym_scale_zp(min[i], max[i], scaling_factor[i], zp[i], nudged_min[i], nudged_max[i]);
+ }
+
+ auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int channel_dim_index) {
+ int channel_idx = indices[channel_dim_index];
+ const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+ auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+ data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+ data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+ quantized_values[cal_offset(dimension, indices)] =
+ static_cast<int32_t>(std::round((data - nudged_min[channel_idx]) * scaling_factor_inv));
+ };
+
+ iterate_per_channel(node, channel_dim_index, quantize);
+
+ node->dtype(loco::DataType::U8); // change the type of tensor
+ node->size<loco::DataType::U8>(size); // resize tensor
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ }
+}
+
void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor,
int32_t &channel_dim_index)
{
auto quantparam = weights->quantparam();
if (quantparam == nullptr)
{
- assert(false && "quantparam is nullptr");
+ // Find min/max on the fly
+ // NOTE This is for the case when QuantizeDequantizeWeights is skipped
+ // TODO Reduce duplicate codes
+ std::vector<float> min;
+ std::vector<float> max;
+ int32_t channel_dim_index = 0;
+
+ cal_minmax_per_channel(weights, min, max, channel_dim_index);
+
+ std::vector<float> nudged_min(min.size());
+ std::vector<float> nudged_max(min.size());
+ std::vector<float> scaling_factor(min.size());
+ std::vector<int64_t> zp(min.size());
+
+ if (output_type == loco::DataType::U8)
+ {
+ asymmetric_wquant_per_channel(weights, min, max, scaling_factor, zp, nudged_min, nudged_max,
+ channel_dim_index);
+ }
+ else
+ {
+ sym_wquant_per_channel(weights, min, max, scaling_factor, zp, nudged_min, nudged_max,
+ channel_dim_index);
+ }
+
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->scale = scaling_factor;
+ quantparam->zerop = zp;
+ quantparam->quantized_dimension = channel_dim_index;
+ weights->quantparam(std::move(quantparam));
+
return;
}
// Find min/max per layer-wise
else
{
- // Quantize using recorded quantparam
auto quantparam = weights->quantparam();
+ if (quantparam == nullptr)
+ {
+ // Find min/max on the fly
+ // NOTE This is for the case when QuantizeDequantizeWeights is skipped
+ // TODO Reduce duplicate codes
+ float min = std::numeric_limits<float>::max();
+ float max = std::numeric_limits<float>::lowest();
+ for (uint32_t i = 0; i < weights->size<loco::DataType::FLOAT32>(); i++)
+ {
+ auto data = weights->at<loco::DataType::FLOAT32>(i);
+ min = data < min ? data : min;
+ max = data > max ? data : max;
+ }
+ float scaling_factor{0};
+ int64_t zp{0};
+ float nudged_min{0};
+ float nudged_max{0};
+
+ asymmetric_wquant_with_minmax_per_layer(weights, min, max, scaling_factor, zp, nudged_min,
+ nudged_max);
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->scale.push_back(scaling_factor);
+ quantparam->zerop.push_back(zp);
+ weights->quantparam(std::move(quantparam));
+ return;
+ }
+
+ // Quantize using recorded quantparam
assert(quantparam != nullptr);
assert(quantparam->min.size() == 1); // only support layer-wise quant
assert(quantparam->scale.size() == 1); // only support layer-wise quant
#include <luci/Log.h>
#include <logo/Phase.h>
-#include <oops/UserExn.h>
-
#include <iostream>
#include <cmath>
* 2. After output feature map
*
* For example, if default_dtype = U8 and op_dtype = S16,
- * 1. Quantize Op for U8->S16 is inserted before ifm
- * 2. Quantize Op for S16->U8 is inserted after ofm
+ * 1. Quantize (U8->S16) is inserted before ifm
+ * 2. Quantize (S16->U8) is inserted after ofm
*
* Why not insert Quantize Op for const ifm?
* We quantize const tensor at once to preserve precision.
if (input->opcode() == luci::CircleOpcode::CIRCLECONST)
return nullptr;
+ // input is not quantizable (ex: index)
+ if (input->quantparam() == nullptr)
+ return nullptr;
+
auto input_quant = create_quantize_op(input, _op_dtype);
input_quant->input(input);
auto origin_node = loco::must_cast<luci::CircleNode *>(origin);
{
auto output = loco::must_cast<luci::CircleNode *>(node);
assert(output->opcode() != luci::CircleOpcode::CIRCLECONST); // FIX_CALLER_UNLESS
+
+ // output is not quantizable (ex: index)
+ if (output->quantparam() == nullptr)
+ return;
+
auto output_quant = create_quantize_op(output, _default_dtype);
luci::add_origin(output_quant, luci::get_origin(output));
void visit(luci::CircleUnpackOut *) {}
// Ops that receive a single activation as an input
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleAbs, x)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleAveragePool2D, value)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleBatchToSpaceND, input)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleConv2D, input)
void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
{
auto inputs = g->inputs();
- for (auto node : loco::input_nodes(g))
+
+ assert(inputs); // FIX_CALLER_UNLESS
+ assert(inputs->size() == _ctx->input_types.size()); // FIX_CALLER_UNLESS
+
+ // NOTE loco::input_nodes returns input nodes following the order of InputIndex
+ auto input_nodes = loco::input_nodes(g);
+ for (uint32_t i = 0; i < input_nodes.size(); i++)
{
- auto input = loco::must_cast<luci::CircleInput *>(node);
- if (input->dtype() == _ctx->input_type)
+ auto input = loco::must_cast<luci::CircleInput *>(input_nodes[i]);
+ assert(i == input->index()); // Fix input_type logic
+
+ const auto user_given_dtype = _ctx->input_types[i];
+
+ if (input->dtype() == user_given_dtype)
continue;
// Bool type is not quantizable
// Update qparam of input
// This step is skipped if input_type is float32
- if (_ctx->input_type != loco::DataType::FLOAT32)
+ if (user_given_dtype != loco::DataType::FLOAT32)
{
auto quantparam = input->quantparam();
assert(quantparam);
float nudged_min{0};
float nudged_max{0};
- if (_ctx->input_type == loco::DataType::U8)
+ if (user_given_dtype == loco::DataType::U8)
{
compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
}
else
{
- assert(_ctx->input_type == loco::DataType::S16);
+ assert(user_given_dtype == loco::DataType::S16);
compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
}
input->quantparam()->scale[0] = scaling_factor;
}
// Update dtype of input
- input->dtype(_ctx->input_type);
+ input->dtype(user_given_dtype);
auto graph_input = inputs->at(input->index());
- graph_input->dtype(_ctx->input_type);
+ graph_input->dtype(user_given_dtype);
}
}
void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
{
auto outputs = g->outputs();
- for (auto node : loco::output_nodes(g))
+ assert(outputs); // FIX_CALLER_UNLESS
+ assert(outputs->size() == _ctx->output_types.size()); // Fix CircleQuantizer unless
+
+ // NOTE loco::output_nodes returns output nodes following the order of OutputIndex
+ auto output_nodes = loco::output_nodes(g);
+ for (uint32_t i = 0; i < output_nodes.size(); i++)
{
- auto output = loco::must_cast<luci::CircleOutput *>(node);
- if (output->dtype() == _ctx->output_type)
+ auto output = loco::must_cast<luci::CircleOutput *>(output_nodes[i]);
+ assert(i == output->index()); // Fix output_type logic
+
+ const auto user_given_dtype = _ctx->output_types[i];
+
+ if (output->dtype() == user_given_dtype)
continue;
// Bool type is not quantizable
auto from = loco::must_cast<luci::CircleNode *>(output->from());
- // The last Op is not quantizable Op (ex: ArgMax)
+ // The last Op is not quantizable (ex: ArgMax)
if (not from->quantparam())
continue;
// Insert Dequantize Op for float32 output_type
- if (_ctx->output_type == loco::DataType::FLOAT32)
+ if (user_given_dtype == loco::DataType::FLOAT32)
{
auto dequant_op = create_dequantize(from);
loco::replace(from).with(dequant_op);
else
{
// Insert Quantize Op for non-float32 output_type
- auto quant_op = create_quantize_op(from, _ctx->output_type);
+ auto quant_op = create_quantize_op(from, user_given_dtype);
loco::replace(from).with(quant_op);
quant_op->input(from);
}
// Update dtype of output
- output->dtype(_ctx->output_type);
+ output->dtype(user_given_dtype);
auto graph_output = outputs->at(output->index());
- graph_output->dtype(_ctx->output_type);
+ graph_output->dtype(user_given_dtype);
}
}
* Weights is quantized using min/max of its value
*
* Bias is quantized using input scale (s_i) and weights scale (s_w)
- * - Activation and weights should be quantized earlier than bias
+ * - Therefore, activation and weights should be quantized earlier than bias
*
- * Quantization Steps
+ * Overall Quantization Steps
* 1. Quantize Activation
* - Quantize using recorded min/max (QuantizeActivation)
* - Insert Quantize Ops for mixed-precision quantization (InsertQuantizeOp)
};
// Quantize activation
- for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ // Why all_nodes?
+ // Models can have inactive (unused) inputs.
+ // We do not reject such models, but quantize them too
+ for (auto node : loco::all_nodes(g))
{
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
QuantizeActivation qa(_ctx->input_model_dtype, quantize_dtype(circle_node));
TEST(QuantizeWithMinMaxPassTest, name)
{
- luci::QuantizeWithMinMaxPass pass(loco::DataType::FLOAT32, loco::DataType::U8,
- luci::QuantizationGranularity::LayerWise);
+ auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+ {
+ ctx->input_model_dtype = loco::DataType::FLOAT32;
+ ctx->output_model_dtype = loco::DataType::U8;
+ ctx->granularity = luci::QuantizationGranularity::LayerWise;
+ }
+
+ luci::QuantizeWithMinMaxPass pass(std::move(ctx));
auto const name = pass.name();
ASSERT_NE(nullptr, name);
}
{
SimpleConcatGraph g(loco::DataType::S32);
- luci::QuantizeWithMinMaxPass qwmm(loco::DataType::FLOAT32, loco::DataType::U8,
- luci::QuantizationGranularity::LayerWise);
+ auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+ {
+ ctx->input_model_dtype = loco::DataType::FLOAT32;
+ ctx->output_model_dtype = loco::DataType::U8;
+ ctx->granularity = luci::QuantizationGranularity::LayerWise;
+ }
+
+ luci::QuantizeWithMinMaxPass qwmm(std::move(ctx));
qwmm.run(&g.g);
EXPECT_EQ(nullptr, g.input_1->quantparam());
EXPECT_EQ(nullptr, g.input_2->quantparam());
}
+
+TEST(QuantizeWithMinMaxPassTest, inactive_input)
+{
+ SimpleConcatGraph g(loco::DataType::FLOAT32);
+
+ // Unused input
+ g.g.nodes()->create<luci::CircleInput>();
+
+ auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+ {
+ ctx->input_model_dtype = loco::DataType::FLOAT32;
+ ctx->output_model_dtype = loco::DataType::U8;
+ ctx->granularity = luci::QuantizationGranularity::LayerWise;
+ }
+
+ luci::QuantizeWithMinMaxPass qwmm(std::move(ctx));
+
+ EXPECT_NO_THROW(qwmm.run(&g.g));
+}
{
loco::DataType output_model_dtype = loco::DataType::Unknown;
QuantizationGranularity granularity = QuantizationGranularity::ChannelWise;
- loco::DataType input_type = loco::DataType::Unknown;
- loco::DataType output_type = loco::DataType::Unknown;
+ std::vector<loco::DataType> input_types;
+ std::vector<loco::DataType> output_types;
bool TF_style_maxpool = false;
std::vector<LayerInfo> layers_info;
};
public:
- QuantizedModelVerifier(loco::DataType quantized_dtype, QuantizationGranularity granularity)
- {
- _ctx = std::make_unique<Context>();
- {
- _ctx->output_model_dtype = quantized_dtype;
- _ctx->granularity = granularity;
- _ctx->input_type = quantized_dtype;
- _ctx->output_type = quantized_dtype;
- _ctx->TF_style_maxpool = false;
- }
- }
-
-public:
QuantizedModelVerifier(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)}
{
// DO NOTHING
#include "luci/Pass/QuantizeWithMinMaxPass.h"
#include "luci/Pass/QuantizationParameters.h"
+#include "luci/Pass/CircleTypeInferencePass.h"
+#include <logo/Phase.h>
#include <luci/test/TestIOGraph.h>
#include <gtest/gtest.h>
qparam->zerop.push_back(zp);
}
+void run_phase(loco::Graph *g, Type quantized_dtype, Granularity granularity)
+{
+ logo::Phase phase;
+
+ // Default passes.
+ phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+ auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+ {
+ ctx->input_model_dtype = loco::DataType::FLOAT32;
+ ctx->output_model_dtype = quantized_dtype;
+ ctx->granularity = granularity;
+ // Test graph has only one input/output
+ ctx->input_types = {quantized_dtype};
+ ctx->output_types = {quantized_dtype};
+ }
+
+ phase.emplace_back(std::make_unique<luci::QuantizeWithMinMaxPass>(std::move(ctx)));
+
+ logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+ phase_runner.run(phase);
+}
+
+void run_phase(loco::Graph *g, std::unique_ptr<luci::QuantizeWithMinMaxPass::Context> &&ctx)
+{
+ logo::Phase phase;
+
+ // Default passes.
+ phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+ phase.emplace_back(std::make_unique<luci::QuantizeWithMinMaxPass>(std::move(ctx)));
+
+ logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+ phase_runner.run(phase);
+}
+
void quantize_and_verify(loco::Graph *g, Type quantized_dtype, Granularity granularity)
{
- luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
- pass.run(g);
+ run_phase(g, quantized_dtype, granularity);
- luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
+ auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
+ {
+ ctx->output_model_dtype = quantized_dtype;
+ ctx->granularity = granularity;
+ // Test graph has only one input/output
+ ctx->input_types = {quantized_dtype};
+ ctx->output_types = {quantized_dtype};
+ }
+
+ luci::QuantizedModelVerifier verifier(std::move(ctx));
verifier.verify(g);
}
ctx->input_model_dtype = Type::FLOAT32;
ctx->output_model_dtype = quantized_dtype;
ctx->granularity = granularity;
- ctx->input_type = quantized_dtype;
- ctx->output_type = quantized_dtype;
+ // Test graph has only one input/output
+ ctx->input_types = {quantized_dtype};
+ ctx->output_types = {quantized_dtype};
ctx->TF_style_maxpool = false;
ctx->layers_info.push_back(info);
}
- luci::QuantizeWithMinMaxPass pass(std::move(ctx));
- pass.run(g);
+ run_phase(g, std::move(ctx));
}
// Do verification
{
ctx->output_model_dtype = quantized_dtype;
ctx->granularity = granularity;
- ctx->input_type = quantized_dtype;
- ctx->output_type = quantized_dtype;
+ ctx->input_types = {quantized_dtype};
+ ctx->output_types = {quantized_dtype};
ctx->TF_style_maxpool = false;
ctx->layers_info.push_back(info);
}
void quantize_and_verify_with_wrong_type(luci::test::TestIOGraph *g, Type quantized_dtype,
Granularity granularity, Type wrong_dtype)
{
- luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
- pass.run(g->g());
+ run_phase(g->g(), quantized_dtype, granularity);
auto node = loco::must_cast<luci::CircleNode *>(g->output()->from());
node->dtype(wrong_dtype);
- luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
+ auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
+ {
+ ctx->output_model_dtype = quantized_dtype;
+ ctx->granularity = granularity;
+ // Test graph has only one input/output
+ ctx->input_types = {quantized_dtype};
+ ctx->output_types = {quantized_dtype};
+ }
+
+ luci::QuantizedModelVerifier verifier(std::move(ctx));
verifier.verify(g->g());
}
void quantize_and_verify_with_wrong_granularity(luci::test::TestIOGraph *g, Type quantized_dtype,
Granularity granularity)
{
- luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
- pass.run(g->g());
+ run_phase(g->g(), quantized_dtype, granularity);
auto node = loco::must_cast<luci::CircleNode *>(g->output()->from());
insert_scale_zp(node, 1.0, 1);
- luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
+ auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
+ {
+ ctx->output_model_dtype = quantized_dtype;
+ ctx->granularity = granularity;
+ // Test graph has only one input/output
+ ctx->input_types = {quantized_dtype};
+ ctx->output_types = {quantized_dtype};
+ }
+
+ luci::QuantizedModelVerifier verifier(std::move(ctx));
verifier.verify(g->g());
}
virtual void init(void) = 0;
};
+class TypedTestGraph : public luci::test::TestIOGraph
+{
+protected:
+ void init(Type T, const luci::test::ShapeU32 shape_in, const luci::test::ShapeU32 shape_out)
+ {
+ TestIOGraph::init(shape_in, shape_out);
+
+ input()->dtype(T);
+ output()->dtype(T);
+
+ g()->inputs()->at(0)->dtype(T);
+ g()->outputs()->at(0)->dtype(T);
+ }
+
+public:
+ virtual void init(void) = 0;
+};
+
class InstanceNormTestGraph final : public SimpleTestGraph
{
public:
output()->from(_argmax);
set_minmax_to_non_const(g(), -1, 1);
+
+ // Sync output dtype with graph's output dtype
+ g()->outputs()->at(0)->dtype(output()->dtype());
}
public:
output()->from(_op);
set_minmax_to_non_const(g(), -1, 1);
+
+ // Sync output dtype with graph's output dtype
+ g()->outputs()->at(0)->dtype(output()->dtype());
}
loco::Node *x(void) const { return _op->x(); }
output()->from(_op);
set_minmax_to_non_const(g(), -1, 1);
+
+ // Sync output dtype with graph's output dtype
+ g()->outputs()->at(0)->dtype(output()->dtype());
}
loco::Node *x(void) const { return _op->x(); }
luci::CircleConst *_const = nullptr;
};
+template <Type T> class IntMulTestGraph final : public TypedTestGraph
+{
+public:
+ void init(void) override
+ {
+ TypedTestGraph::init(T, {32}, {32});
+
+ _const = create_dummy_const<T>(g(), {32});
+ _mul = g()->nodes()->create<luci::CircleMul>();
+ {
+ _mul->x(input());
+ _mul->y(_const);
+ _mul->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _mul->name("test");
+ _mul->dtype(T);
+ }
+ output()->from(_mul);
+ }
+
+ loco::Node *x() { return _mul->x(); }
+ loco::Node *y() { return _mul->y(); }
+
+private:
+ luci::CircleMul *_mul = nullptr;
+ luci::CircleConst *_const = nullptr;
+};
+
class AddTestGraph final : public SimpleTestGraph
{
public:
luci::CircleConst *_const = nullptr;
};
+template <Type T> class IntAddTestGraph final : public TypedTestGraph
+{
+public:
+ void init(void) override
+ {
+ TypedTestGraph::init(T, {32}, {32});
+
+ _const = create_dummy_const<T>(g(), {32});
+ _add = g()->nodes()->create<luci::CircleAdd>();
+ {
+ _add->x(input());
+ _add->y(_const);
+ _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _add->name("test");
+ _add->dtype(T);
+ }
+ output()->from(_add);
+ }
+
+ loco::Node *x() { return _add->x(); }
+ loco::Node *y() { return _add->y(); }
+
+private:
+ luci::CircleAdd *_add = nullptr;
+ luci::CircleConst *_const = nullptr;
+};
+
} // namespace
// Quantize and verify with given configurations
// Quantize and verify with wrong type
// Users can specify the test target
-#define TEST_WITH_WRONG_TYPE_TARGET(graph, type, granularity, wrong_dtype, target) \
- do \
- { \
- graph g; \
- g.init(); \
- auto node = loco::must_cast<luci::CircleNode *>(target); \
- luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, type, granularity); \
- pass.run(g.g()); \
- auto after_node = loco::must_cast<luci::CircleNode *>(target); \
- after_node->dtype(wrong_dtype); \
- luci::QuantizedModelVerifier verifier(type, granularity); \
- EXPECT_ANY_THROW(verifier.verify(g.g())); \
+#define TEST_WITH_WRONG_TYPE_TARGET(graph, type, granularity_, wrong_dtype, target) \
+ do \
+ { \
+ graph g; \
+ g.init(); \
+ auto node = loco::must_cast<luci::CircleNode *>(target); \
+ run_phase(g.g(), type, granularity_); \
+ auto after_node = loco::must_cast<luci::CircleNode *>(target); \
+ after_node->dtype(wrong_dtype); \
+ auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>(); \
+ { \
+ ctx->output_model_dtype = type; \
+ ctx->granularity = granularity_; \
+ ctx->input_types = {type}; \
+ ctx->output_types = {type}; \
+ } \
+ luci::QuantizedModelVerifier verifier(std::move(ctx)); \
+ EXPECT_ANY_THROW(verifier.verify(g.g())); \
} while (0)
// Quantize and verify with wrong granularity
// Users can specify the test target
-#define TEST_WITH_WRONG_GRANULARITY_TARGET(graph, type, granularity, target) \
- do \
- { \
- graph g; \
- g.init(); \
- auto node = loco::must_cast<luci::CircleNode *>(target); \
- luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, type, granularity); \
- pass.run(g.g()); \
- auto after_node = loco::must_cast<luci::CircleNode *>(target); \
- insert_scale_zp(after_node, 1.0, 1); \
- luci::QuantizedModelVerifier verifier(type, granularity); \
- EXPECT_ANY_THROW(verifier.verify(g.g())); \
+#define TEST_WITH_WRONG_GRANULARITY_TARGET(graph, type, granularity_, target) \
+ do \
+ { \
+ graph g; \
+ g.init(); \
+ auto node = loco::must_cast<luci::CircleNode *>(target); \
+ run_phase(g.g(), type, granularity_); \
+ auto after_node = loco::must_cast<luci::CircleNode *>(target); \
+ insert_scale_zp(after_node, 1.0, 1); \
+ auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>(); \
+ { \
+ ctx->output_model_dtype = type; \
+ ctx->granularity = granularity_; \
+ ctx->input_types = {type}; \
+ ctx->output_types = {type}; \
+ } \
+ luci::QuantizedModelVerifier verifier(std::move(ctx)); \
+ EXPECT_ANY_THROW(verifier.verify(g.g())); \
} while (0)
// Test a local helper function
SUCCEED();
}
+TEST(QuantizedModelVerifierTest, Add_inttype)
+{
+ // Tests for S32
+ TEST_WITH_GRAPH(IntAddTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(IntAddTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(IntAddTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+ // Tests for S64
+ TEST_WITH_GRAPH(IntAddTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(IntAddTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(IntAddTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(IntAddTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+ SUCCEED();
+}
+
TEST(QuantizedModelVerifierTest, Mul)
{
TEST_WITH_GRAPH(MulTestGraph, Type::U8, Granularity::LayerWise);
SUCCEED();
}
+TEST(QuantizedModelVerifierTest, Mul_inttype)
+{
+ // Tests for S32
+ TEST_WITH_GRAPH(IntMulTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(IntMulTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(IntMulTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+ // Tests for S64
+ TEST_WITH_GRAPH(IntMulTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(IntMulTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(IntMulTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(IntMulTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+ SUCCEED();
+}
+
// TODO Add following testcases
//
// CircleConv2D
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveDuplicateConstPass.h"
+
+#include <luci/Log.h>
+
+namespace
+{
+
+bool compare_quant_params(luci::CircleConst *left, luci::CircleConst *right)
+{
+ const auto left_quant_param = left->quantparam();
+ const auto right_quant_param = right->quantparam();
+
+ if (left_quant_param == right_quant_param)
+ return true;
+
+ if (left_quant_param != nullptr and right_quant_param != nullptr)
+ {
+ if (left_quant_param->scale == right_quant_param->scale and
+ left_quant_param->quantized_dimension == right_quant_param->quantized_dimension and
+ left_quant_param->zerop == right_quant_param->zerop and
+ left_quant_param->min == right_quant_param->min and
+ left_quant_param->max == right_quant_param->max)
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool compare_dim_values(luci::CircleConst *left, luci::CircleConst *right)
+{
+ const auto left_rank = left->rank();
+ const auto right_rank = right->rank();
+
+ if (left_rank != right_rank)
+ return false;
+
+ for (uint32_t i = 0; i < left_rank; ++i)
+ {
+ if (left->dim(i).value() != right->dim(i).value())
+ return false;
+ }
+
+ return true;
+}
+
+template <loco::DataType DT> bool is_equal_consts(luci::CircleConst *left, luci::CircleConst *right)
+{
+ if (not compare_quant_params(left, right))
+ return false;
+
+ if (not compare_dim_values(left, right))
+ return false;
+
+ for (uint32_t i = 0; i < left->size<DT>(); ++i)
+ {
+ if (left->at<DT>(i) != right->at<DT>(i))
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool RemoveDuplicateConstPass::remove_duplicate_const()
+{
+ bool changed = false;
+
+ for (auto &cur_pair : _sum_to_const)
+ {
+ // if single const - continue
+ if (cur_pair.second.size() == 1)
+ continue;
+
+ for (auto reference_const : cur_pair.second)
+ {
+ if (reference_const == nullptr)
+ continue;
+
+ for (uint32_t i = 0; i < cur_pair.second.size(); ++i)
+ {
+ auto cur_const = cur_pair.second.at(i);
+ if (cur_const == nullptr or cur_const == reference_const)
+ continue;
+
+ if (cur_const->dtype() != reference_const->dtype())
+ continue;
+
+ bool is_equal = false;
+
+ switch (cur_const->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ is_equal = is_equal_consts<loco::DataType::FLOAT32>(reference_const, cur_const);
+ break;
+ case loco::DataType::S32:
+ is_equal = is_equal_consts<loco::DataType::S32>(reference_const, cur_const);
+ break;
+ case loco::DataType::S16:
+ is_equal = is_equal_consts<loco::DataType::S16>(reference_const, cur_const);
+ break;
+ case loco::DataType::S8:
+ is_equal = is_equal_consts<loco::DataType::S8>(reference_const, cur_const);
+ break;
+ case loco::DataType::U8:
+ is_equal = is_equal_consts<loco::DataType::U8>(reference_const, cur_const);
+ break;
+ default:
+ continue;
+ }
+
+ if (not is_equal)
+ continue;
+
+ loco::replace(cur_const).with(reference_const);
+
+ // Remove from next checking
+ cur_pair.second[i] = nullptr;
+
+ changed = true;
+ }
+ }
+ }
+
+ return changed;
+}
+
+template <loco::DataType DT>
+void RemoveDuplicateConstPass::add_to_map(luci::CircleConst *const_node)
+{
+ const auto const_size = const_node->size<DT>();
+ float sum = 0.0;
+
+ for (uint32_t i = 0; i < const_size; ++i)
+ {
+ sum += const_node->at<DT>(i);
+ }
+
+ if (_sum_to_const.find(sum) == _sum_to_const.end())
+ {
+ _sum_to_const[sum] = {const_node};
+ }
+ else
+ {
+ _sum_to_const.at(sum).push_back(const_node);
+ }
+}
+
+/**
+ * Remove duplicate Const nodes.
+ *
+ * BEFORE
+ * [CircleNode] [CircleConst]
+ * | /
+ * | /
+ * [CircleNode] [CircleConst]
+ * | /
+ * | /
+ * [CircleNode]
+ *
+ * AFTER
+ *
+ * [CircleNode] [CircleConst]
+ * | / /
+ * | / /
+ * [CircleNode] /
+ * | /
+ * | /
+ * [CircleNode]
+ *
+ */
+bool RemoveDuplicateConstPass::run(loco::Graph *g)
+{
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto const_node = dynamic_cast<luci::CircleConst *>(node);
+ if (const_node == nullptr)
+ continue;
+
+ switch (const_node->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ add_to_map<loco::DataType::FLOAT32>(const_node);
+ break;
+ case loco::DataType::S32:
+ add_to_map<loco::DataType::S32>(const_node);
+ break;
+ case loco::DataType::S16:
+ add_to_map<loco::DataType::S16>(const_node);
+ break;
+ case loco::DataType::S8:
+ add_to_map<loco::DataType::S8>(const_node);
+ break;
+ case loco::DataType::U8:
+ add_to_map<loco::DataType::U8>(const_node);
+ break;
+ default:
+ continue;
+ }
+ }
+
+ return remove_duplicate_const();
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveDuplicateConstPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/test/TestIOGraph.h>
+#include <gtest/gtest.h>
+
+namespace
+{
+using namespace luci::test;
+
+class DuplicateConstsGraphlet
+{
+public:
+ DuplicateConstsGraphlet() = default;
+
+public:
+ void init(loco::Graph *g, bool is_duplicate)
+ {
+ _reshape_shape = g->nodes()->create<luci::CircleConst>();
+ _reshape_shape->rank(1);
+ _reshape_shape->dim(0).set(1);
+ _reshape_shape->shape_status(luci::ShapeStatus::VALID);
+ _reshape_shape->dtype(loco::DataType::S32);
+
+ _reshape_shape->size<loco::DataType::S32>(1);
+ _reshape_shape->at<loco::DataType::S32>(0) = 5;
+ _reshape_shape->name("reshape_shape_1");
+
+ _reshape_shape_duplicate = g->nodes()->create<luci::CircleConst>();
+ _reshape_shape_duplicate->rank(1);
+ _reshape_shape_duplicate->dim(0).set(1);
+ _reshape_shape_duplicate->shape_status(luci::ShapeStatus::VALID);
+ _reshape_shape_duplicate->dtype(loco::DataType::S32);
+ if (is_duplicate)
+ {
+ _reshape_shape_duplicate->size<loco::DataType::S32>(1);
+ _reshape_shape_duplicate->at<loco::DataType::S32>(0) = 5;
+ }
+ else
+ {
+ _reshape_shape_duplicate->size<loco::DataType::S32>(2);
+ _reshape_shape_duplicate->at<loco::DataType::S32>(0) = 1;
+ _reshape_shape_duplicate->at<loco::DataType::S32>(1) = 5;
+ }
+ _reshape_shape_duplicate->name("reshape_shape_2");
+
+ _reshape_f = g->nodes()->create<luci::CircleReshape>();
+ _reshape_f->newShape()->rank(1);
+ _reshape_f->newShape()->dim(0) = 5;
+ _reshape_f->name("reshape_f");
+
+ _reshape_s = g->nodes()->create<luci::CircleReshape>();
+ if (is_duplicate)
+ {
+ _reshape_s->newShape()->rank(1);
+ _reshape_s->newShape()->dim(0) = 5;
+ }
+ else
+ {
+ _reshape_s->newShape()->rank(2);
+ _reshape_s->newShape()->dim(0) = 1;
+ _reshape_s->newShape()->dim(1) = 5;
+ }
+ _reshape_s->name("reshape_s");
+ }
+
+protected:
+ luci::CircleReshape *_reshape_f = nullptr;
+ luci::CircleReshape *_reshape_s = nullptr;
+ luci::CircleConst *_reshape_shape = nullptr;
+ luci::CircleConst *_reshape_shape_duplicate = nullptr;
+};
+
+class DuplicateConstsGraph : public TestIOGraph, public DuplicateConstsGraphlet
+{
+public:
+ DuplicateConstsGraph() = default;
+
+public:
+ void init(const ShapeU32 in_shape, const ShapeU32 out_shape, bool is_duplicate)
+ {
+ TestIOGraph::init(in_shape, out_shape);
+
+ DuplicateConstsGraphlet::init(g(), is_duplicate);
+
+ // connect graph
+ _reshape_f->tensor(input());
+ _reshape_f->shape(_reshape_shape);
+
+ _reshape_s->tensor(_reshape_f);
+ _reshape_s->shape(_reshape_shape_duplicate);
+
+ output()->from(_reshape_s);
+ }
+};
+} // namespace
+
+TEST(RemoveDuplicateConstPass, name)
+{
+ luci::RemoveDuplicateConstPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveDuplicateConstPass, remove_duplicate)
+{
+ DuplicateConstsGraph g;
+ g.init({1, 5}, {5}, true);
+
+ luci::RemoveDuplicateConstPass pass;
+ while (pass.run(g.g()))
+ ;
+
+ uint32_t const_num = 0;
+ for (auto node : loco::active_nodes(loco::output_nodes(g.g())))
+ {
+ auto target_node = dynamic_cast<luci::CircleConst *>(node);
+ if (target_node != nullptr)
+ const_num++;
+ }
+
+ ASSERT_EQ(const_num, 1);
+}
+
+TEST(RemoveDuplicateConstPass, remove_duplicate_NEG)
+{
+ DuplicateConstsGraph g;
+ g.init({1, 5}, {1, 5}, false);
+
+ luci::RemoveDuplicateConstPass pass;
+ while (pass.run(g.g()))
+ ;
+
+ uint32_t const_num = 0;
+ for (auto node : loco::active_nodes(loco::output_nodes(g.g())))
+ {
+ auto target_node = dynamic_cast<luci::CircleConst *>(node);
+ if (target_node != nullptr)
+ const_num++;
+ }
+
+ ASSERT_EQ(const_num, 2);
+}
}
}
+// Create CircleReshape where
+// - dtype is same with node
+// - shape is same with node
+// NOTE: User should set input(tensor) of the returned Op.
+luci::CircleReshape *create_reshape(luci::CircleFullyConnected *node)
+{
+ assert(node); // FIX_CALLER_UNLESS
+
+ auto g = node->graph();
+
+ auto reshape = g->nodes()->create<luci::CircleReshape>();
+ reshape->name(node->name() + "/reshape");
+ reshape->dtype(node->dtype());
+ luci::add_origin(reshape, luci::get_origin(node));
+
+ auto shape_const = g->nodes()->create<luci::CircleConst>();
+ shape_const->dtype(loco::DataType::S32);
+ shape_const->rank(1);
+ shape_const->dim(0).set(node->rank());
+ shape_const->size<loco::DataType::S32>(node->rank());
+ for (uint32_t i = 0; i < node->rank(); i++)
+ {
+ assert(node->dim(i).known()); // FIX_CALLER_UNLESS
+ shape_const->at<loco::DataType::S32>(i) = node->dim(i).value();
+ }
+ shape_const->shape_status(luci::ShapeStatus::VALID);
+ shape_const->name(node->name() + "/shape");
+ luci::add_origin(shape_const, luci::get_origin(node));
+
+ reshape->shape(shape_const);
+
+ return reshape;
+}
+
/**
* Replace Fully Connected with Batched MatMul
*
*
* [Node1] [Node2]
* \ /
- * [BatchMatMul] [BiasValue]?
+ * [BatchMatMul]
+ * |
+ * [Reshape] [BiasValue]?
* \ /
* [Add]?
* |
* [Activation]?
*
* Nodes with "?" denote optional elements
+ * NOTE Reshape Op is inserted to keep the original shape of FullyConnected Op
+ * Reshape Op can be redundant (input shape == output shape). This can be removed
+ * by RemoveUnnecessaryReshapePass.
*/
bool replace_fc_with_matmul(luci::CircleFullyConnected *fc)
{
luci::CircleNode *x = nullptr;
luci::CircleNode *y = nullptr;
- luci::CircleNode *b = nullptr;
luci::CircleTranspose *ty = nullptr;
luci::CircleTranspose *tx = nullptr;
bool adj_x = false;
x = loco::must_cast<luci::CircleNode *>(fc->input());
}
- b = loco::must_cast<luci::CircleNode *>(fc->bias());
+ if (x->dtype() != loco::DataType::FLOAT32 || y->dtype() != loco::DataType::FLOAT32)
+ return false;
- if (x->dtype() != loco::DataType::FLOAT32 || y->dtype() != loco::DataType::FLOAT32 ||
- b->dtype() != loco::DataType::FLOAT32)
+ auto bc = dynamic_cast<luci::CircleConst *>(fc->bias());
+ // NOTE bias can be empty as CircleOutputExclude type
+ // NOTE we can only handle bias as FLOAT32 type as of now
+ if (nullptr != bc && bc->dtype() != loco::DataType::FLOAT32)
return false;
auto name = fc->name();
luci::add_origin(matmul, luci::get_origin(fc));
+ auto reshape = create_reshape(fc);
+ reshape->tensor(matmul);
+
auto all_zero = [](const luci::CircleConst *c) {
bool ac = true;
for (uint32_t i = 0; i < c->size<loco::DataType::FLOAT32>() && ac; i++)
return ac;
};
- auto bc = dynamic_cast<luci::CircleConst *>(b);
- if ((nullptr != bc) && !all_zero(bc))
+ if (nullptr != bc && !all_zero(bc))
{
auto bias_add = fc->graph()->nodes()->create<luci::CircleAdd>();
- bias_add->x(matmul);
- bias_add->y(b);
+ bias_add->x(reshape);
+ bias_add->y(bc);
bias_add->name(fc->name() + "/bias_add");
bias_add->dtype(fc->dtype());
add_origin(bias_add, get_origin(fc));
}
else
{
- auto n = fromActivation(matmul, fc->fusedActivationFunction());
+ // NOTE bias doesn't exist or bias is all zero
+ auto n = fromActivation(reshape, fc->fusedActivationFunction());
add_origin(n, luci::get_origin(fc));
n->name(fc->name() + "fusedActivation");
n->dtype(fc->dtype());
auto ret = pass.run(g.g());
EXPECT_EQ(true, ret);
- auto mm = dynamic_cast<luci::CircleBatchMatMul *>(g.output()->from());
- EXPECT_NE(nullptr, mm);
+ auto res = dynamic_cast<luci::CircleReshape *>(g.output()->from());
+ EXPECT_NE(nullptr, res);
}
TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, nonzero_bias_test)
}
auto empty_bias = graph->nodes()->create<luci::CircleOutputExclude>();
- empty_bias->dtype(loco::DataType::FLOAT32); // Needed for type inference
auto fc_node = graph->nodes()->create<luci::CircleFullyConnected>();
fc_node->input(lhs);
#include <loco.h>
#include <oops/InternalExn.h>
+#include <limits> // std::numeric_limits
#include <flatbuffers/flexbuffers.h>
#include <luci/Profile/CircleNodeOrigin.h>
#include <luci/Service/Nodes/CircleConst.h>
+#include <limits> // std::numeric_limits
+
namespace
{
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h"
+
+#include "helpers/NodeFiller.h"
+#include "helpers/TypeMapper.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+#include <string>
+#include <vector>
+
+/**
+ * BEFORE
+ * [CircleNode]
+ * |
+ * [UnidirectionalSequenceLSTM]
+ * |
+ * [CircleNode]
+ *
+ * AFTER
+ *
+ * [CircleNode]
+ * |
+ * [CircleTranspose]
+ * |
+ * [CircleUnpack]
+ * |
+ * [CircleUnpackOut]
+ * |
+ * (Unrolled sub network)
+ * |
+ * [CirclePack]
+ * | |
+ * [CircleTranspose] [UnidirectionalSequenceLSTM]
+ * | |
+ * [CircleNode]
+ *
+ * NOTE for timesteps = 1,
+ * first [CircleTranspose] is not added and
+ * last [CirclePack] + [CircleTranspose] is replaced with [CircleReshape]
+ *
+ * First unrolled sub network is as follows
+ * - [] and 'Circle' are omitted
+ * - all FC has one or two Const for Weight/Bias
+ *
+ * (input)
+ * |
+ * FC
+ * |
+ * Split
+ * +---------+----------+----------+
+ * | | | |
+ * | Logistic Logistic Tanh
+ * | Const | | |
+ * | | | | |
+ * | +-- Mul +-- Mul ---+
+ * | | |
+ * | +---- Add ------+
+ * | |
+ * | +----+----+
+ * | | |
+ * Logistic Tanh |
+ * | | |
+ * +-- Mul ----+ |
+ * | |
+ * (output) (A)
+ *
+ * and following unrolled sub networks are;
+ *
+ * (prev-output) (input)
+ * | |
+ * FC FC
+ * | |
+ * +--- Add --+
+ * Const |
+ * | |
+ * +------ Add
+ * |
+ * Split
+ * |
+ * +---------+----------+----------+
+ * SplitOut SplitOut SplitOut SplitOut
+ * | | | |
+ * | Logistic Logistic Tanh
+ * | (A') | | |
+ * | | | | |
+ * | +--- Mul +-- Mul ---+
+ * | | |
+ * | +---- Add ------+
+ * | |
+ * | +----+----+
+ * | | |
+ * Logistic Tanh |
+ * | | |
+ * +-- Mul ----+ |
+ * | |
+ * (output) (next)
+ *
+ * where (A) and (A') are connected
+ *
+ */
+
+namespace
+{
+
+struct UnrollLSTM
+{
+ luci::CircleConst *transpose_perm(void);
+ luci::CircleTranspose *first_transpose(luci::CircleNode *input);
+ std::vector<luci::CircleUnpackOut *> input_unpacks(luci::CircleNode *input);
+ luci::CircleConst *merged_weights(luci::CircleConst *iw, luci::CircleConst *fw,
+ luci::CircleConst *cw, luci::CircleConst *ow);
+ luci::CircleFullyConnected *create_input_matmul(luci::CircleNode *input);
+ luci::CircleAdd *create_input_matmul(luci::CircleNode *input, luci::CircleMul *mul,
+ uint32_t step);
+ std::vector<luci::CircleSplitOut *> matmul_splits(luci::CircleNode *input, uint32_t step);
+ luci::CircleConst *forget_zero(void);
+ luci::CircleMul *forget_gate_cell(std::vector<luci::CircleSplitOut *> &splits,
+ luci::CircleNode *prev, uint32_t step,
+ luci::CircleNode **retadd);
+ luci::CircleReshape *last_reshape(luci::CircleNode *input);
+ luci::CircleTranspose *last_transpose(std::vector<luci::CircleMul *> &output_muls);
+
+ luci::CircleUnidirectionalSequenceLSTM *_lstm{nullptr};
+ loco::Graph::NodeContext *_nctx{nullptr};
+ std::string _name;
+ uint32_t _batch{0};
+ uint32_t _timesteps{0};
+ uint32_t _units{0}; // output space dim
+};
+
+luci::CircleConst *UnrollLSTM::transpose_perm(void)
+{
+ auto perm = _nctx->create<luci::CircleConst>();
+ perm->dtype(loco::DataType::S32);
+ perm->rank(1);
+ perm->dim(0) = 3;
+ perm->size<loco::DataType::S32>(3);
+ perm->at<loco::DataType::S32>(0) = 1;
+ perm->at<loco::DataType::S32>(1) = 0;
+ perm->at<loco::DataType::S32>(2) = 2;
+ perm->shape_status(luci::ShapeStatus::VALID);
+
+ return perm;
+}
+
+luci::CircleTranspose *UnrollLSTM::first_transpose(luci::CircleNode *input)
+{
+ assert(input != nullptr);
+
+ auto perm = transpose_perm();
+ perm->name(_name + "_perm1");
+ luci::add_origin(perm, luci::get_origin(_lstm));
+
+ auto transpose = _nctx->create<luci::CircleTranspose>();
+ transpose->a(input);
+ transpose->perm(perm);
+ transpose->name(_name + "_trans1");
+ luci::add_origin(transpose, luci::get_origin(_lstm));
+
+ return transpose;
+}
+
+std::vector<luci::CircleUnpackOut *> UnrollLSTM::input_unpacks(luci::CircleNode *input)
+{
+ assert(input != nullptr);
+
+ // NOTE unpack input can be LSTM or Transpose
+ auto unpack = _nctx->create<luci::CircleUnpack>();
+ unpack->num(_timesteps);
+ unpack->axis(0);
+ unpack->value(input);
+ unpack->name(_name + "_unpack");
+ luci::add_origin(unpack, luci::get_origin(_lstm));
+
+ std::vector<luci::CircleUnpackOut *> outs;
+ for (uint32_t idx = 0; idx < _timesteps; ++idx)
+ {
+ auto unpackout = _nctx->create<luci::CircleUnpackOut>();
+ unpackout->input(unpack);
+ unpackout->index(idx);
+ unpackout->name(_name + "_unpackout_" + std::to_string(idx));
+ luci::add_origin(unpackout, luci::get_origin(_lstm));
+ outs.push_back(unpackout);
+ }
+
+ return outs;
+}
+
+luci::CircleConst *UnrollLSTM::merged_weights(luci::CircleConst *iw, luci::CircleConst *fw,
+ luci::CircleConst *cw, luci::CircleConst *ow)
+{
+ assert(iw != nullptr);
+ assert(fw != nullptr);
+ assert(cw != nullptr);
+ assert(ow != nullptr);
+
+ auto iw_rank = iw->rank();
+ assert(iw_rank == fw->rank());
+ assert(iw_rank == cw->rank());
+ assert(iw_rank == ow->rank());
+
+ uint32_t ne_w = 1;
+ for (uint32_t i = 0; i < iw_rank; i++)
+ ne_w *= iw->dim(i).value();
+
+ assert(iw->dtype() == loco::DataType::FLOAT32);
+ assert(fw->dtype() == loco::DataType::FLOAT32);
+ assert(cw->dtype() == loco::DataType::FLOAT32);
+ assert(ow->dtype() == loco::DataType::FLOAT32);
+
+ // merged weights
+ auto mw = _nctx->create<luci::CircleConst>();
+ mw->dtype(iw->dtype());
+ mw->rank(iw_rank);
+ mw->dim(0) = 4u * iw->dim(0).value();
+ for (uint32_t i = 1; i < iw_rank; i++)
+ mw->dim(i) = iw->dim(i);
+ mw->size<loco::DataType::FLOAT32>(4 * ne_w);
+ mw->shape_status(luci::ShapeStatus::VALID);
+ for (uint32_t i = 0; i < ne_w; ++i)
+ {
+ mw->at<loco::DataType::FLOAT32>(i + ne_w * 0) = iw->at<loco::DataType::FLOAT32>(i);
+ mw->at<loco::DataType::FLOAT32>(i + ne_w * 1) = fw->at<loco::DataType::FLOAT32>(i);
+ mw->at<loco::DataType::FLOAT32>(i + ne_w * 2) = cw->at<loco::DataType::FLOAT32>(i);
+ mw->at<loco::DataType::FLOAT32>(i + ne_w * 3) = ow->at<loco::DataType::FLOAT32>(i);
+ }
+ return mw;
+}
+
+luci::CircleFullyConnected *UnrollLSTM::create_input_matmul(luci::CircleNode *input)
+{
+ assert(input != nullptr);
+
+ // weights
+ auto iw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_input_weights());
+ auto fw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_forget_weights());
+ auto cw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_cell_weights());
+ auto ow = loco::must_cast<luci::CircleConst *>(_lstm->input_to_output_weights());
+
+ auto fcw = merged_weights(iw, fw, cw, ow);
+ fcw->name(_name + "_fc_w");
+ luci::add_origin(fcw, luci::get_origin(_lstm));
+
+ // bias
+ auto ib = loco::must_cast<luci::CircleConst *>(_lstm->input_gate_bias());
+ auto fb = loco::must_cast<luci::CircleConst *>(_lstm->forget_gate_bias());
+ auto cb = loco::must_cast<luci::CircleConst *>(_lstm->cell_gate_bias());
+ auto ob = loco::must_cast<luci::CircleConst *>(_lstm->output_gate_bias());
+
+ auto fcb = merged_weights(ib, fb, cb, ob);
+ fcb->name(_name + "_fc_b");
+ luci::add_origin(fcb, luci::get_origin(_lstm));
+
+ auto fc = _nctx->create<luci::CircleFullyConnected>();
+ fc->input(input);
+ fc->weights(fcw);
+ fc->bias(fcb);
+ fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+ fc->name(_name + "_fc");
+ luci::add_origin(fc, luci::get_origin(_lstm));
+
+ return fc;
+}
+
+luci::CircleAdd *UnrollLSTM::create_input_matmul(luci::CircleNode *input, luci::CircleMul *mul,
+ uint32_t step)
+{
+ assert(input != nullptr);
+ assert(mul != nullptr);
+ assert(step < _timesteps);
+
+ auto base_name = _name + "_matmul" + std::to_string(step);
+
+ // input weights
+ auto iw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_input_weights());
+ auto fw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_forget_weights());
+ auto cw = loco::must_cast<luci::CircleConst *>(_lstm->input_to_cell_weights());
+ auto ow = loco::must_cast<luci::CircleConst *>(_lstm->input_to_output_weights());
+
+ auto fcw = merged_weights(iw, fw, cw, ow);
+ fcw->name(base_name + "_fc_w");
+ luci::add_origin(fcw, luci::get_origin(_lstm));
+
+ auto fcb = _nctx->create<luci::CircleOutputExclude>();
+
+ auto fc = _nctx->create<luci::CircleFullyConnected>();
+ fc->input(input);
+ fc->weights(fcw);
+ fc->bias(fcb);
+ fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+ fc->name(base_name + "_fc");
+ luci::add_origin(fc, luci::get_origin(_lstm));
+
+ // recurrent weights
+ auto ri = loco::must_cast<luci::CircleConst *>(_lstm->recurrent_to_input_weights());
+ auto rf = loco::must_cast<luci::CircleConst *>(_lstm->recurrent_to_forget_weights());
+ auto rc = loco::must_cast<luci::CircleConst *>(_lstm->recurrent_to_cell_weights());
+ auto ro = loco::must_cast<luci::CircleConst *>(_lstm->recurrent_to_output_weights());
+
+ auto fcrw = merged_weights(ri, rf, rc, ro);
+ fcrw->name(base_name + "_fcr_w");
+ luci::add_origin(fcrw, luci::get_origin(_lstm));
+
+ auto fcrb = _nctx->create<luci::CircleOutputExclude>();
+
+ auto fcr = _nctx->create<luci::CircleFullyConnected>();
+ fcr->input(mul);
+ fcr->weights(fcrw);
+ fcr->bias(fcrb);
+ fcr->fusedActivationFunction(luci::FusedActFunc::NONE);
+ fcr->name(base_name + "_fcr");
+ luci::add_origin(fcr, luci::get_origin(_lstm));
+
+ auto add_fc = _nctx->create<luci::CircleAdd>();
+ add_fc->x(fcr);
+ add_fc->y(fc);
+ add_fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+ add_fc->name(base_name + "_addfc");
+ luci::add_origin(add_fc, luci::get_origin(_lstm));
+
+ // bias
+ auto ib = loco::must_cast<luci::CircleConst *>(_lstm->input_gate_bias());
+ auto fb = loco::must_cast<luci::CircleConst *>(_lstm->forget_gate_bias());
+ auto cb = loco::must_cast<luci::CircleConst *>(_lstm->cell_gate_bias());
+ auto ob = loco::must_cast<luci::CircleConst *>(_lstm->output_gate_bias());
+
+ auto bias = merged_weights(ib, fb, cb, ob);
+ bias->name(base_name + "_bias");
+
+ auto add_bias = _nctx->create<luci::CircleAdd>();
+ add_bias->x(add_fc);
+ add_bias->y(bias);
+ add_bias->fusedActivationFunction(luci::FusedActFunc::NONE);
+ add_bias->name(base_name + "_addbias");
+ luci::add_origin(add_bias, luci::get_origin(_lstm));
+
+ return add_bias;
+}
+
+std::vector<luci::CircleSplitOut *> UnrollLSTM::matmul_splits(luci::CircleNode *input,
+ uint32_t step)
+{
+ assert(input != nullptr);
+ assert(step < _timesteps);
+
+ std::string split_name = _name + "_sp" + std::to_string(step);
+
+ auto split_dim = _nctx->create<luci::CircleConst>();
+ split_dim->dtype(loco::DataType::S32);
+ split_dim->rank(1);
+ split_dim->dim(0) = 1;
+ split_dim->size<loco::DataType::S32>(1);
+ split_dim->at<loco::DataType::S32>(0) = 1;
+ split_dim->shape_status(luci::ShapeStatus::VALID);
+ split_dim->name(split_name + "_dim");
+ luci::add_origin(split_dim, luci::get_origin(_lstm));
+
+ auto split = _nctx->create<luci::CircleSplit>();
+ split->num_split(4);
+ split->split_dim(split_dim);
+ split->input(input);
+ split->name(split_name);
+ luci::add_origin(split, luci::get_origin(_lstm));
+
+ auto split_o0 = _nctx->create<luci::CircleSplitOut>();
+ split_o0->input(split);
+ split_o0->index(0);
+ split_o0->name(split_name + "_spo0");
+ luci::add_origin(split_o0, luci::get_origin(_lstm));
+
+ auto split_o1 = _nctx->create<luci::CircleSplitOut>();
+ split_o1->input(split);
+ split_o1->index(1);
+ split_o1->name(split_name + "_spo1");
+ luci::add_origin(split_o1, luci::get_origin(_lstm));
+
+ auto split_o2 = _nctx->create<luci::CircleSplitOut>();
+ split_o2->input(split);
+ split_o2->index(2);
+ split_o2->name(split_name + "_spo2");
+ luci::add_origin(split_o2, luci::get_origin(_lstm));
+
+ auto split_o3 = _nctx->create<luci::CircleSplitOut>();
+ split_o3->input(split);
+ split_o3->index(3);
+ split_o3->name(split_name + "_spo3");
+ luci::add_origin(split_o3, luci::get_origin(_lstm));
+
+ std::vector<luci::CircleSplitOut *> outs;
+ outs.push_back(split_o0);
+ outs.push_back(split_o1);
+ outs.push_back(split_o2);
+ outs.push_back(split_o3);
+ return outs;
+}
+
+luci::CircleConst *UnrollLSTM::forget_zero(void)
+{
+ uint32_t amount = _batch * _units;
+
+ auto zero = _nctx->create<luci::CircleConst>();
+ zero->dtype(loco::DataType::FLOAT32);
+ zero->rank(2);
+ zero->dim(0) = _batch;
+ zero->dim(1) = _units;
+ zero->size<loco::DataType::FLOAT32>(amount);
+ for (uint32_t idx = 0; idx < amount; ++idx)
+ zero->at<loco::DataType::FLOAT32>(idx) = 0.0;
+ zero->shape_status(luci::ShapeStatus::VALID);
+ zero->name(_name + "_zero");
+ luci::add_origin(zero, luci::get_origin(_lstm));
+ return zero;
+}
+
+luci::CircleMul *UnrollLSTM::forget_gate_cell(std::vector<luci::CircleSplitOut *> &splits,
+ luci::CircleNode *prev, uint32_t step,
+ luci::CircleNode **retadd)
+{
+ assert(splits.size() > 0);
+ assert(prev != nullptr);
+ assert(step < _timesteps);
+
+ std::string net_name = _name + "_net" + std::to_string(step);
+
+ auto split_0 = splits[0]; // input-input : Logistic - Mul(c) - Add - Tanh - Mul
+ auto split_1 = splits[1]; // input-forget : Logistic - Mul(p) - Add - Tanh - Mul
+ auto split_2 = splits[2]; // input-cell : Tanh - Mul(c) - Add - Tanh - Mul
+ auto split_3 = splits[3]; // input-output : Logistic - Mul
+
+ auto logis_0 = _nctx->create<luci::CircleLogistic>();
+ logis_0->x(split_0);
+ logis_0->name(net_name + "_log0");
+ luci::add_origin(logis_0, luci::get_origin(_lstm));
+
+ auto logis_1 = _nctx->create<luci::CircleLogistic>();
+ logis_1->x(split_1);
+ logis_1->name(net_name + "_log1");
+ luci::add_origin(logis_1, luci::get_origin(_lstm));
+
+ auto tanh_2 = _nctx->create<luci::CircleTanh>();
+ tanh_2->x(split_2);
+ tanh_2->name(net_name + "_tanh2");
+ luci::add_origin(tanh_2, luci::get_origin(_lstm));
+
+ auto logis_3 = _nctx->create<luci::CircleLogistic>();
+ logis_3->x(split_3);
+ logis_3->name(net_name + "_log3");
+ luci::add_origin(logis_3, luci::get_origin(_lstm));
+
+ auto mul_c = _nctx->create<luci::CircleMul>();
+ mul_c->x(logis_0);
+ mul_c->y(tanh_2);
+ mul_c->fusedActivationFunction(luci::FusedActFunc::NONE);
+ mul_c->name(net_name + "_mul1");
+ luci::add_origin(mul_c, luci::get_origin(_lstm));
+
+ auto mul_p = _nctx->create<luci::CircleMul>();
+ mul_p->x(logis_1);
+ mul_p->y(prev);
+ mul_p->fusedActivationFunction(luci::FusedActFunc::NONE);
+ mul_p->name(net_name + "_mul2");
+ luci::add_origin(mul_p, luci::get_origin(_lstm));
+
+ auto add_cp = _nctx->create<luci::CircleAdd>();
+ add_cp->x(mul_c);
+ add_cp->y(mul_p);
+ add_cp->fusedActivationFunction(luci::FusedActFunc::NONE);
+ add_cp->name(net_name + "_add1");
+ luci::add_origin(add_cp, luci::get_origin(_lstm));
+
+ if (retadd != nullptr)
+ *retadd = add_cp;
+
+ auto tanh_cp = _nctx->create<luci::CircleTanh>();
+ tanh_cp->x(add_cp);
+ tanh_cp->name(net_name + "_tanh3");
+ luci::add_origin(tanh_cp, luci::get_origin(_lstm));
+
+ auto mul_out = _nctx->create<luci::CircleMul>();
+ mul_out->x(logis_3);
+ mul_out->y(tanh_cp);
+ mul_out->fusedActivationFunction(luci::FusedActFunc::NONE);
+ mul_out->name(net_name + "_mul3");
+ luci::add_origin(mul_out, luci::get_origin(_lstm));
+
+ return mul_out;
+}
+
+luci::CircleReshape *UnrollLSTM::last_reshape(luci::CircleNode *input)
+{
+ assert(input != nullptr);
+
+ auto reshape_s = _nctx->create<luci::CircleConst>();
+ reshape_s->dtype(loco::DataType::S32);
+ reshape_s->rank(1);
+ reshape_s->dim(0) = 3;
+ reshape_s->size<loco::DataType::S32>(3);
+ reshape_s->at<loco::DataType::S32>(0) = _batch;
+ reshape_s->at<loco::DataType::S32>(1) = _timesteps;
+ reshape_s->at<loco::DataType::S32>(2) = _units;
+ reshape_s->shape_status(luci::ShapeStatus::VALID);
+ reshape_s->name(_name + "_reshape_s");
+ luci::add_origin(reshape_s, luci::get_origin(_lstm));
+
+ auto reshape = _nctx->create<luci::CircleReshape>();
+ reshape->tensor(input);
+ reshape->shape(reshape_s);
+ reshape->newShape()->rank(3);
+ reshape->newShape()->dim(0) = _batch;
+ reshape->newShape()->dim(1) = _timesteps;
+ reshape->newShape()->dim(2) = _units;
+ reshape->name(_name + "_reshape");
+ luci::add_origin(reshape, luci::get_origin(_lstm));
+
+ return reshape;
+}
+
+luci::CircleTranspose *UnrollLSTM::last_transpose(std::vector<luci::CircleMul *> &output_muls)
+{
+ assert(output_muls.size() == _timesteps);
+
+ auto pack = _nctx->create<luci::CirclePack>(_timesteps);
+ pack->axis(0);
+ for (uint32_t idx = 0; idx < _timesteps; ++idx)
+ pack->values(idx, output_muls[idx]);
+ pack->name(_name + "_pack");
+ luci::add_origin(pack, luci::get_origin(_lstm));
+
+ auto perm = transpose_perm();
+ perm->name(_name + "_perm2");
+ luci::add_origin(perm, luci::get_origin(_lstm));
+
+ auto transpose = _nctx->create<luci::CircleTranspose>();
+ transpose->a(pack);
+ transpose->perm(perm);
+ transpose->name(_name + "_trans2");
+ luci::add_origin(transpose, luci::get_origin(_lstm));
+
+ return transpose;
+}
+
+bool unroll_lstm(luci::CircleUnidirectionalSequenceLSTM *lstm)
+{
+ // NOTE shape of input of lstm is interpreted as [batch, timesteps, feature]
+ // shape of output of lstm is interpreted as [batch, timesteps, units]
+ // TODO add more conditions to check LSTM
+ assert(lstm != nullptr);
+ assert(lstm->rank() == 3); // use assert to findout when this happens
+ if (lstm->rank() != 3)
+ return false;
+ if (!(lstm->dim(0).known() and lstm->dim(1).known() and lstm->dim(2).known()))
+ return false;
+
+ UnrollLSTM ulstm;
+ ulstm._lstm = lstm;
+ ulstm._nctx = lstm->graph()->nodes();
+ ulstm._name = lstm->name();
+ ulstm._batch = lstm->dim(0).value();
+ ulstm._timesteps = lstm->dim(1).value();
+ ulstm._units = lstm->dim(2).value(); // output space dim
+
+ luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(lstm->input());
+ assert(input->rank() == 3); // use assert to findout when this happens
+ if (input->rank() != 3)
+ return false;
+ assert(input->dim(0).value() == ulstm._batch);
+ assert(input->dim(1).value() == ulstm._timesteps);
+
+ if (ulstm._timesteps > 1)
+ {
+ // Transpose to switch batch <-> timesteps
+ // NOTE TF uses Reshape when batch is 1 but as there is Transpose->Reshape
+ // Pass, we can just use Transpose for both cases
+ auto transpose = ulstm.first_transpose(input);
+ input = transpose;
+ }
+
+ auto unpacks = ulstm.input_unpacks(input);
+ assert(unpacks.size() == ulstm._timesteps);
+ uint32_t step = 0;
+ auto unpackout = unpacks[step];
+
+ // First FC
+ auto fc_1 = ulstm.create_input_matmul(unpackout);
+ assert(fc_1 != nullptr);
+ auto splits = ulstm.matmul_splits(fc_1, step);
+ assert(splits.size() == 4);
+
+ luci::CircleNode *prev = nullptr; // prev step CircleAdd
+ luci::CircleNode *this_add = nullptr;
+
+ prev = ulstm.forget_zero(); // provide all zero constant for first step
+
+ std::vector<luci::CircleMul *> output_muls;
+ auto mul_gc = ulstm.forget_gate_cell(splits, prev, step, &this_add);
+ assert(mul_gc != nullptr);
+ assert(this_add != nullptr);
+ // gather all Muls for last Pack
+ output_muls.push_back(mul_gc);
+
+ for (step = 1; step < ulstm._timesteps; ++step)
+ {
+ auto unpackout = unpacks[step];
+ auto add_n = ulstm.create_input_matmul(unpackout, mul_gc, step);
+
+ auto splits = ulstm.matmul_splits(add_n, step);
+ assert(splits.size() == 4);
+
+ prev = this_add;
+ mul_gc = ulstm.forget_gate_cell(splits, prev, step, &this_add);
+ assert(mul_gc != nullptr);
+ assert(this_add != nullptr);
+
+ output_muls.push_back(mul_gc);
+ }
+ assert(output_muls.size() == ulstm._timesteps);
+
+ if (ulstm._timesteps == 1)
+ {
+ // Reshape for single step
+ auto reshape = ulstm.last_reshape(mul_gc);
+ loco::replace(lstm).with(reshape);
+ }
+ else
+ {
+ // Pack + Transpose for two or more steps
+ auto transpose = ulstm.last_transpose(output_muls);
+ loco::replace(lstm).with(transpose);
+ }
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool UnrollUnidirectionalSequenceLSTMPass::run(loco::Graph *g)
+{
+ bool changed = false;
+
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto lstm = dynamic_cast<luci::CircleUnidirectionalSequenceLSTM *>(node))
+ {
+ if (unroll_lstm(lstm))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/Nodes/CircleUnidirectionalSequenceLSTM.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class UniSeqLSTMGraphlet
+{
+public:
+ UniSeqLSTMGraphlet() = default;
+
+ void init(loco::Graph *g, const ShapeU32 oshape)
+ {
+ _uslstm = g->nodes()->create<luci::CircleUnidirectionalSequenceLSTM>();
+ _uslstm->dtype(loco::DataType::FLOAT32);
+ _uslstm->shape(oshape);
+ _uslstm->name("uslstm");
+
+ _uslstm->fusedActivationFunction(luci::FusedActFunc::TANH);
+ _uslstm->cell_clip(0.0);
+ _uslstm->proj_clip(0.0);
+ _uslstm->time_major(false);
+ _uslstm->asymmetric_quantize_inputs(false);
+
+ _iw = weight_1x1(g);
+ _rw = weight_1x1(g);
+ _gb = weight_1(g);
+ _ex = g->nodes()->create<luci::CircleOutputExclude>();
+ }
+
+protected:
+ luci::CircleConst *weight_1x1(loco::Graph *g)
+ {
+ auto w = g->nodes()->create<luci::CircleConst>();
+ w->dtype(loco::DataType::FLOAT32);
+ w->rank(2);
+ w->dim(0) = 1;
+ w->dim(1) = 1;
+ w->size<loco::DataType::FLOAT32>(1);
+ w->at<loco::DataType::FLOAT32>(0) = 1.0;
+ w->shape_status(luci::ShapeStatus::VALID);
+ return w;
+ }
+
+ luci::CircleConst *weight_1(loco::Graph *g)
+ {
+ auto w = g->nodes()->create<luci::CircleConst>();
+ w->dtype(loco::DataType::FLOAT32);
+ w->rank(1);
+ w->dim(0) = 1;
+ w->size<loco::DataType::FLOAT32>(1);
+ w->at<loco::DataType::FLOAT32>(0) = 1.0;
+ w->shape_status(luci::ShapeStatus::VALID);
+ return w;
+ }
+
+protected:
+ luci::CircleUnidirectionalSequenceLSTM *_uslstm = nullptr;
+ luci::CircleConst *_iw = nullptr;
+ luci::CircleConst *_rw = nullptr;
+ luci::CircleConst *_gb = nullptr;
+ luci::CircleOutputExclude *_ex = nullptr;
+};
+
+class UnrollUniSeqLSTMPassTestGraph : public TestIOGraph, public UniSeqLSTMGraphlet
+{
+public:
+ UnrollUniSeqLSTMPassTestGraph() = default;
+
+ void init(const ShapeU32 ishape, const ShapeU32 oshape)
+ {
+ TestIOGraph::init(ishape, oshape);
+ UniSeqLSTMGraphlet::init(g(), oshape);
+
+ auto inode = input();
+ _uslstm->input(inode);
+
+ _uslstm->input_to_input_weights(_iw);
+ _uslstm->input_to_forget_weights(_iw);
+ _uslstm->input_to_cell_weights(_iw);
+ _uslstm->input_to_output_weights(_iw);
+
+ _uslstm->recurrent_to_input_weights(_rw);
+ _uslstm->recurrent_to_forget_weights(_rw);
+ _uslstm->recurrent_to_cell_weights(_rw);
+ _uslstm->recurrent_to_output_weights(_rw);
+
+ _uslstm->cell_to_input_weights(_ex);
+ _uslstm->cell_to_forget_weights(_ex);
+ _uslstm->cell_to_output_weights(_ex);
+
+ _uslstm->input_gate_bias(_gb);
+ _uslstm->forget_gate_bias(_gb);
+ _uslstm->cell_gate_bias(_gb);
+ _uslstm->output_gate_bias(_gb);
+
+ _uslstm->projection_weights(_ex);
+ _uslstm->projection_bias(_ex);
+
+ _uslstm->output_state(_ex);
+ _uslstm->cell_state(_ex);
+
+ _uslstm->input_layer_norm_coefficients(_ex);
+ _uslstm->forget_layer_norm_coefficients(_ex);
+ _uslstm->cell_layer_norm_coefficients(_ex);
+ _uslstm->output_layer_norm_coefficients(_ex);
+
+ output()->from(_uslstm);
+ }
+};
+
+} // namespace
+
+namespace
+{
+
+using namespace luci::test;
+
+// FakeQuantGraphlet is for simple negative test
+class FakeQuantGraphlet
+{
+public:
+ FakeQuantGraphlet() = default;
+
+public:
+ void init(loco::Graph *g)
+ {
+ _fq = g->nodes()->create<luci::CircleFakeQuant>();
+ _fq->name("fq");
+ }
+
+protected:
+ luci::CircleFakeQuant *_fq = nullptr;
+};
+
+class FakeQuantGraph : public TestIOGraph, public FakeQuantGraphlet
+{
+public:
+ FakeQuantGraph() = default;
+
+public:
+ void init(void)
+ {
+ TestIOGraph::init({1, 1, 1}, {1, 1, 1});
+ FakeQuantGraphlet::init(g());
+
+ _fq->inputs(input());
+
+ output()->from(_fq);
+ }
+};
+
+} // namespace
+
+TEST(UnrollUnidirectionalSequenceLSTMPassTestName, name)
+{
+ luci::UnrollUnidirectionalSequenceLSTMPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+class UnrollUnidirectionalSequenceLSTMPassTest : public ::testing::Test
+{
+public:
+ UnrollUniSeqLSTMPassTestGraph g;
+ luci::UnrollUnidirectionalSequenceLSTMPass pass;
+};
+
+TEST_F(UnrollUnidirectionalSequenceLSTMPassTest, simple_run)
+{
+ g.init({1, 1, 1}, {1, 1, 1});
+
+ EXPECT_TRUE(pass.run(g.g()));
+}
+
+class UnrollUnidirectionalSequenceLSTMPassTestN : public ::testing::Test
+{
+public:
+ FakeQuantGraph g;
+ luci::UnrollUnidirectionalSequenceLSTMPass pass;
+};
+
+TEST_F(UnrollUnidirectionalSequenceLSTMPassTestN, simple_run_NEG)
+{
+ g.init();
+
+ EXPECT_FALSE(pass.run(g.g()));
+}
bool visit(const luci::CircleAdd *node)
{
+ // Skip granularity check for indices
+ if (node->dtype() == loco::DataType::S32 or node->dtype() == loco::DataType::S64)
+ return true;
+
RETURN_FALSE_UNLESS(is_lwq(node));
RETURN_FALSE_UNLESS(is_lwq(node->x()));
RETURN_FALSE_UNLESS(is_lwq(node->y()));
bool visit(const luci::CircleMul *node)
{
+ // Skip granularity check for indices
+ if (node->dtype() == loco::DataType::S32 or node->dtype() == loco::DataType::S64)
+ return true;
+
RETURN_FALSE_UNLESS(is_lwq(node));
RETURN_FALSE_UNLESS(is_lwq(node->x()));
RETURN_FALSE_UNLESS(is_lwq(node->y()));
template <loco::DataType Qtype, loco::DataType Btype>
bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleAdd *node)
{
+ // Allow add of indices
+ if (group_has_type(node, loco::DataType::S32) or group_has_type(node, loco::DataType::S64))
+ return true;
+
return group_has_type(node, Qtype);
}
template <loco::DataType Qtype, loco::DataType Btype>
bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMul *node)
{
+ // Allow mul of indices
+ if (group_has_type(node, loco::DataType::S32) or group_has_type(node, loco::DataType::S64))
+ return true;
+
return group_has_type(node, Qtype);
}
*/
template <class COMM_NODE> bool with_commutative_args_of(const COMM_NODE *node);
+ /**
+ * @note Similar as with_commutative_args_of but not commutative.
+ * _arg_1 and _arg_2 must match that of ARG_TYPE_1 and ARG_TYPE_2.
+ */
+ template <class COMM_NODE> bool with_args_of(const COMM_NODE *node);
+
private:
ARG_TYPE_1 **_arg_1;
ARG_TYPE_2 **_arg_2;
return false;
}
+template <class ARG_TYPE_1, class ARG_TYPE_2>
+template <class COMM_NODE>
+bool NodeFiller<ARG_TYPE_1, ARG_TYPE_2>::with_args_of(const COMM_NODE *node)
+{
+ // X == ARG_TYPE_1 / Y == ARG_TYPE_2
+ {
+ auto x = dynamic_cast<ARG_TYPE_1 *>(node->x());
+ auto y = dynamic_cast<ARG_TYPE_2 *>(node->y());
+
+ if (x && y)
+ {
+ *_arg_1 = x;
+ *_arg_2 = y;
+ return true;
+ }
+ }
+
+ return false;
+}
+
} // namespace luci
#ifndef __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__
#define __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__
+#include <cstddef>
#include <cstdint>
#include <vector>
return loco::DataType::Unknown;
}
+// Convert string to a vector of loco::DataType
+std::vector<loco::DataType> str_vec_to_dtype_vec(std::vector<std::string> &vec)
+{
+ std::vector<loco::DataType> res;
+ std::transform(vec.begin(), vec.end(), std::back_inserter(res),
+ [](std::string s) -> loco::DataType { return str_to_dtype(to_lower_case(s)); });
+ return res;
+}
+
QuantizationGranularity str_to_granularity(const std::string &str)
{
if (to_lower_case(str).compare("layer") == 0)
loco::DataType str_to_dtype(const std::string &);
+std::vector<loco::DataType> str_vec_to_dtype_vec(std::vector<std::string> &);
+
QuantizationGranularity str_to_granularity(const std::string &);
} // namespace luci
EXPECT_THROW(luci::str_to_granularity("foo"), std::runtime_error);
}
+
+TEST(StringsTest, str_vec_to_dtype_vec)
+{
+ std::vector<std::string> input1 = {"uint8", "int16", "float32"};
+ auto result1 = luci::str_vec_to_dtype_vec(input1);
+ ASSERT_EQ(3, result1.size());
+ ASSERT_EQ(loco::DataType::U8, result1[0]);
+ ASSERT_EQ(loco::DataType::S16, result1[1]);
+ ASSERT_EQ(loco::DataType::FLOAT32, result1[2]);
+
+ std::vector<std::string> input2 = {"uint8", "int16", "float32", ""};
+ auto result2 = luci::str_vec_to_dtype_vec(input2);
+ ASSERT_EQ(4, result2.size());
+ ASSERT_EQ(loco::DataType::U8, result2[0]);
+ ASSERT_EQ(loco::DataType::S16, result2[1]);
+ ASSERT_EQ(loco::DataType::FLOAT32, result2[2]);
+ ASSERT_EQ(loco::DataType::Unknown, result2[3]);
+
+ std::vector<std::string> input3 = {"uint8"};
+ auto result3 = luci::str_vec_to_dtype_vec(input3);
+ ASSERT_EQ(1, result3.size());
+ ASSERT_EQ(loco::DataType::U8, result3[0]);
+}
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_PASS_TEST_IO_GRAPH_H__
-#define __LUCI_PASS_TEST_IO_GRAPH_H__
-
-#include "TestShape.h"
-
-#include <luci/IR/CircleNodes.h>
-
-namespace luci
-{
-namespace test
-{
-
-/**
- * @brief Graphlet with Inputs and loco::Graph for multiple inputs
- * @note Every Graph will have Input(s) and Output(s)
- * We put loco::Graph only in IsGraphlet not to declare separate
- * class for loco::Graph
- */
-template <unsigned N> class TestIsGraphlet
-{
-public:
- TestIsGraphlet()
- {
- for (uint32_t n = 0; n < N; ++n)
- {
- _graph_inputs[n] = nullptr;
- _inputs[n] = nullptr;
- }
- }
-
-public:
- virtual void init(loco::Graph *g, const ShapeU32 shape_in)
- {
- for (uint32_t n = 0; n < N; ++n)
- {
- _graph_inputs[n] = g->inputs()->create();
-
- _inputs[n] = g->nodes()->create<luci::CircleInput>();
- _inputs[n]->shape(shape_in);
- _inputs[n]->shape_status(luci::ShapeStatus::VALID);
- _inputs[n]->dtype(loco::DataType::FLOAT32);
- _inputs[n]->name("input_" + std::to_string(n));
-
- _inputs[n]->index(_graph_inputs[n]->index());
-
- auto input_shape = std::make_unique<loco::TensorShape>();
- set_shape_vector(input_shape.get(), shape_in);
- _graph_inputs[n]->shape(std::move(input_shape));
- _graph_inputs[n]->dtype(loco::DataType::FLOAT32);
- }
- }
-
-public:
- loco::Graph *g(void) { return &_g; }
- luci::CircleInput *input(int idx) { return _inputs[idx]; }
-
-protected:
- loco::Graph _g;
- std::array<loco::GraphInput *, N> _graph_inputs;
- std::array<luci::CircleInput *, N> _inputs;
-};
-
-/**
- * @brief Graphlet with one Input
- */
-class TestIGraphlet : public TestIsGraphlet<1>
-{
-public:
- luci::CircleInput *input() { return _inputs[0]; }
-};
-
-/**
- * @brief Graphlet with Outputs for multiple outputs
- */
-template <unsigned N> class TestOsGraphlet
-{
-public:
- TestOsGraphlet()
- {
- for (uint32_t n = 0; n < N; ++n)
- {
- _graph_outputs[n] = nullptr;
- _outputs[n] = nullptr;
- }
- }
-
-public:
- virtual void init(loco::Graph *g, const ShapeU32 shape_out)
- {
- for (uint32_t n = 0; n < N; ++n)
- {
- _graph_outputs[n] = g->outputs()->create();
-
- _outputs[n] = g->nodes()->create<luci::CircleOutput>();
- _outputs[n]->shape(shape_out);
- _outputs[n]->shape_status(luci::ShapeStatus::VALID);
- _outputs[n]->dtype(loco::DataType::FLOAT32);
- _outputs[n]->name("output_" + std::to_string(n));
-
- _outputs[n]->index(_graph_outputs[n]->index());
-
- auto output_shape = std::make_unique<loco::TensorShape>();
- set_shape_vector(output_shape.get(), shape_out);
- _graph_outputs[n]->shape(std::move(output_shape));
- _graph_outputs[n]->dtype(loco::DataType::FLOAT32);
- }
- }
-
-public:
- luci::CircleOutput *output(int idx) { return _outputs[idx]; }
-
-protected:
- std::array<loco::GraphOutput *, N> _graph_outputs;
- std::array<luci::CircleOutput *, N> _outputs;
-};
-
-/**
- * @brief Graphlet with one Output
- */
-class TestOGraphlet : public TestOsGraphlet<1>
-{
-public:
- luci::CircleOutput *output() { return _outputs[0]; }
-};
-
-/**
- * @brief Graph with Input and Output
- */
-class TestIOGraph : public TestIGraphlet, public TestOGraphlet
-{
-public:
- TestIOGraph() = default;
-
-public:
- virtual void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
- {
- TestIsGraphlet<1>::init(g(), shape_in);
- TestOsGraphlet<1>::init(g(), shape_out);
- }
-};
-
-} // namespace test
-} // namespace luci
-
-#endif // __LUCI_PASS_TEST_IO_GRAPH_H__
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TestIOGraph.h"
-
-// This file validates "TestIOGraph.h". Pleaes DO NOT remove this file.
require("logo")
require("logo-core")
require("mio-circle04")
-require("mio-tflite280")
+require("luci-compute")
require("oops")
require("hermes")
require("hermes-std")
auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
auto weights_shape = luci::shape_get(node->weights()).as<loco::TensorShape>();
-// TODO Remove following unused code
-#if 0
- // Checking shape capability for fully connected layer
- // Input: a tensor of at least rank 2 [D1, D2, ... Dn]
- // Weight: [# of units, K]
- // Output: [D1 * D2 * ... * Dn / K, # of units]
- if (input_shape.rank() < 2 || weights_shape.rank() != 2)
- {
- // Return node own shape if shape inference is not possible
- return use_own(node);
- }
-
- uint32_t input_size = 1;
- for (uint32_t i = 0; i < input_shape.rank(); i++)
- {
- input_size = input_size * input_shape.dim(i).value();
- }
- const uint32_t batch_size = input_size / weights_shape.dim(1).value();
- loco::TensorShape out_shape;
- out_shape.rank(2);
- out_shape.dim(0) = batch_size;
- out_shape.dim(1) = weights_shape.dim(0);
-#endif
-
loco::TensorShape out_shape;
// NOTE Some recipes in some repositories are using rank 4 input for FullyConnected.
loco::DataType visit(const luci::CircleOutputDummy *node) final { return node->dtype(); }
- loco::DataType visit(const luci::CircleOutputExclude *node) final { return node->dtype(); }
+ loco::DataType visit(const luci::CircleOutputExclude *node) final
+ {
+ // NOTE We don't care CircleOutputExclude dtype, but set to FLOAT32
+ // if it's Unknown to make type inference happy.
+ if (node->dtype() == loco::DataType::Unknown)
+ return loco::DataType::FLOAT32;
+ return node->dtype();
+ }
loco::DataType visit(const luci::CircleCustomOut *node) final { return node->dtype(); }
{
cloned->fusedActivationFunction(node->fusedActivationFunction());
cloned->weights_format(node->weights_format());
+ cloned->keep_num_dims(node->keep_num_dims());
}
return cloned;
}
return true;
}
+class MultiOutNodeValidate final : public luci::CircleNodeVisitor<bool>
+{
+public:
+ MultiOutNodeValidate() {}
+
+private:
+ template <class T> bool check(const luci::CircleNode *node)
+ {
+ auto succs = loco::succs(node);
+ if (succs.size() < 1)
+ return false;
+ for (const auto &cnode : succs)
+ {
+ auto const child = dynamic_cast<const T *>(cnode);
+ if (child == nullptr)
+ return false;
+ }
+ return true;
+ }
+
+public:
+ bool visit(const luci::CircleBidirectionalSequenceLSTM *node) final
+ {
+ return check<luci::CircleBidirectionalSequenceLSTMOut>(node);
+ }
+ bool visit(const luci::CircleCustom *node) final { return check<luci::CircleCustomOut>(node); }
+ bool visit(const luci::CircleIf *node) final { return check<luci::CircleIfOut>(node); }
+ bool visit(const luci::CircleNonMaxSuppressionV4 *node) final
+ {
+ return check<luci::CircleNonMaxSuppressionV4Out>(node);
+ }
+ bool visit(const luci::CircleNonMaxSuppressionV5 *node) final
+ {
+ return check<luci::CircleNonMaxSuppressionV5Out>(node);
+ }
+ bool visit(const luci::CircleSplit *node) final { return check<luci::CircleSplitOut>(node); }
+ bool visit(const luci::CircleSplitV *node) final { return check<luci::CircleSplitVOut>(node); }
+ bool visit(const luci::CircleTopKV2 *node) final { return check<luci::CircleTopKV2Out>(node); }
+ bool visit(const luci::CircleUnique *node) final { return check<luci::CircleUniqueOut>(node); }
+ bool visit(const luci::CircleUnpack *node) final { return check<luci::CircleUnpackOut>(node); }
+ bool visit(const luci::CircleWhile *node) final { return check<luci::CircleWhileOut>(node); }
+
+ // default true for other nodes
+ bool visit(const luci::CircleNode *) final { return true; }
+};
+
+/**
+ * @brief Validate sequence of multi-output nodes are followed for specific
+ * IRs such as CircleIfOut.
+ */
+bool validate_multi_outs(loco::Graph *g)
+{
+ LOGGER(l);
+
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto const cnode = loco::must_cast<luci::CircleNode *>(node);
+
+ MultiOutNodeValidate d;
+ if (cnode->accept(&d))
+ continue;
+
+ auto const name = cnode->name();
+ INFO(l) << "Node: " << name << ", " << (uint32_t)(cnode->opcode()) << " has invalid successor."
+ << std::endl;
+
+ return false;
+ }
+
+ return true;
+}
+
class VirtualNodeDetector final : public luci::CircleNodeVisitor<bool>
{
public:
if (!validate_shape_dtype(g))
return false;
+ if (!validate_multi_outs(g))
+ return false;
+
// TODO add more validation
return true;
addread(TransposeConv_000)
addread(UnidirectionalSequenceLSTM_000)
addread(UnidirectionalSequenceLSTM_001)
+addread(UnidirectionalSequenceLSTM_002)
addread(Unique_000)
addread(Unique_001)
addread(Unique_002)
addwrite(TransposeConv_000)
addwrite(UnidirectionalSequenceLSTM_000)
addwrite(UnidirectionalSequenceLSTM_001)
+addwrite(UnidirectionalSequenceLSTM_002)
addwrite(Unique_000)
addwrite(Unique_001)
addwrite(Unique_002)
add_executable(mio_tflite280_validate example.cpp)
target_link_libraries(mio_tflite280_validate mio_tflite280)
-nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 QUIET)
-
-if(NOT TensorFlowGEMMLowpSource_FOUND)
- return()
-endif(NOT TensorFlowGEMMLowpSource_FOUND)
-
-add_library(mio_tflite280_inc INTERFACE)
-target_include_directories(mio_tflite280_inc SYSTEM INTERFACE "${TensorFlowSource_DIR}")
-target_include_directories(mio_tflite280_inc SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}")
-
file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
auto matches = matcher.matchEdge(is_constant, is_transpose);
while (!matches.empty())
{
- for (const auto match : matches)
+ for (const auto &match : matches)
{
auto constant_op = dynamic_cast<ops::ConstantOp *>(match.first);
auto transpose_op = dynamic_cast<ops::TransposeOp *>(match.second);
+#include <string>
int main()
{
Shape s{1, 2, 3};
Tensor in_t(s);
- NNModel model("nnmodel.params");
+ NNModel model(std::string("nnmodel.params"));
model.set_in(in_t);
model.doInference();
std::shared_ptr<Tensor> out_t = model.get_out();
+# NOTE find_package try to use at least python3.8 as follows depending on platform version
+# Ubuntu18.04; explictly installed python3.8 (default is python3.6)
+# Ubuntu20.04; default python3.8
+# Ubuntu22.04; default python3.10
+# refer https://github.com/Samsung/ONE/issues/9962
+find_package(PythonInterp 3.8 QUIET)
+find_package(PythonLibs 3.8 QUIET)
+
+if(NOT ${PYTHONINTERP_FOUND})
+ message(STATUS "Build one-cmds: FALSE (Python3 is missing)")
+ return()
+endif()
+
+if(${PYTHON_VERSION_MINOR} LESS 8)
+ message(STATUS "Build one-cmds: FALSE (You need to install Python version higher than 3.8)")
+ return()
+endif()
+
+# NOTE these files should not have extensions.
+# below code will remove extension when copy and install.
set(ONE_COMMAND_FILES
one-build
one-import
one-profile
one-infer
one-codegen
- one-prepare-venv
onecc
)
+# TODO find better way for per-platform files
+if(ONE_UBUNTU_CODENAME_JAMMY)
+ # NOTE copy one-prepare-venv.u2204 as build/../one-prepare-venv
+ # and install build/../one-prepare-venv file
+ list(APPEND ONE_COMMAND_FILES one-prepare-venv.u2204)
+else()
+ list(APPEND ONE_COMMAND_FILES one-prepare-venv)
+endif()
+
# pytorch importer is an experimental feature, it is not used in default configuration
if(ENABLE_ONE_IMPORT_PYTORCH)
list(APPEND ONE_COMMAND_FILES one-import-pytorch)
set(ONE_COMMAND_FILE ${ONE_COMMAND})
set(ONE_COMMAND_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${ONE_COMMAND_FILE}")
- set(ONE_COMMAND_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_COMMAND_FILE}")
+ # strip extension from the name
+ get_filename_component(ONE_COMMNAD_FILE_NAME ${ONE_COMMAND} NAME_WE)
+ set(ONE_COMMAND_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_COMMNAD_FILE_NAME}")
set(ONE_COMMAND_TARGET "${ONE_COMMAND}_target")
add_custom_command(OUTPUT ${ONE_COMMAND_BIN}
add_custom_target(${ONE_COMMAND_TARGET} ALL DEPENDS ${ONE_COMMAND_BIN})
- install(FILES ${ONE_COMMAND}
+ install(FILES ${ONE_COMMAND_BIN}
PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
GROUP_READ GROUP_EXECUTE
WORLD_READ WORLD_EXECUTE
set(ONE_UTILITY_FILES
one-build.template.cfg
onecc.template.cfg
- utils.py
onnx_legalizer.py
)
endforeach(ONE_UTILITY)
+# one-pack internally uses model2nnpkg tool
+set(MODEL2NNPKG "${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.py")
+install(FILES ${MODEL2NNPKG}
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION bin
+ RENAME "model2nnpkg")
+
# make python directory
set(ONE_PYTHON_FILES constant.py
+ export_constant.py
make_cmd.py
CfgRunner.py
OptionBuilder.py
TopologicalSortHelper.py
- WorkflowRunner.py)
+ WorkflowRunner.py
+ utils.py)
foreach(ONE_PYTHON_FILE IN ITEMS ${ONE_PYTHON_FILES})
endforeach(ONE_PYTHON_FILE)
+set(CONSTANT_EXPORTING_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/${ONE_PYTHON_DIR}/export_constant.py")
+set(O1_OPTION "O1")
+set(O1_CFG_FILE "${O1_OPTION}.cfg")
+set(O1_CFG_FILE_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_PYTHON_DIR}/${O1_CFG_FILE}")
+
+add_custom_command(OUTPUT ${O1_CFG_FILE_BIN}
+ COMMAND ${PYTHON_EXECUTABLE} ${CONSTANT_EXPORTING_SCRIPT}
+ --constant ${O1_OPTION}
+ --format cfg
+ --output_path ${O1_CFG_FILE_BIN}
+ DEPENDS ${CONSTANT_EXPORTING_SCRIPT}
+ COMMENT "Generate ${O1_CFG_FILE}"
+)
+
+add_custom_target("O1_cfg_target" ALL DEPENDS ${O1_CFG_FILE_BIN})
+
+install(FILES ${O1_CFG_FILE_BIN}
+ PERMISSIONS OWNER_WRITE OWNER_READ
+ GROUP_READ
+ WORLD_READ
+ DESTINATION optimization)
+
set(ONE_DOCUMENT_FILES
how-to-use-one-commands.txt
how-to-prepare-virtualenv.txt
set(HELP_INFER_SRC src/help-infer.cpp)
set(DUMMY_PROFILE_SRC src/dummy-profile.cpp)
set(HELP_PROFILE_SRC src/help-profile.cpp)
+set(DUMMY_ENV_SRC src/dummyEnv-compile.cpp)
add_executable(dummy-compile ${DUMMY_DRIVER_SRC})
add_executable(help-compile ${HELP_DRIVER_SRC})
add_executable(help-infer ${HELP_INFER_SRC})
add_executable(dummy-profile ${DUMMY_PROFILE_SRC})
add_executable(help-profile ${HELP_PROFILE_SRC})
+add_executable(dummyEnv-compile ${DUMMY_ENV_SRC})
set(DUMMY_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/dummy-compile")
set(HELP_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/help-compile")
set(HELP_INFER "${CMAKE_CURRENT_BINARY_DIR}/help-infer")
set(DUMMY_PROFILE "${CMAKE_CURRENT_BINARY_DIR}/dummy-profile")
set(HELP_PROFILE "${CMAKE_CURRENT_BINARY_DIR}/help-profile")
+set(DUMMY_ENV "${CMAKE_CURRENT_BINARY_DIR}/dummyEnv-compile")
install(FILES ${DUMMY_DRIVER}
PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
GROUP_READ GROUP_EXECUTE
WORLD_READ WORLD_EXECUTE
DESTINATION test)
+
+install(FILES ${DUMMY_ENV}
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION test)
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummyEnv-compile only tests its interface rather than its functionality.
+ *
+ * ./dummyEnv-compile ${DUMMY_OUTPUT}
+ */
+
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+ if (argc != 2)
+ return EXIT_FAILURE;
+
+ std::string spm_size;
+
+ if (const char *env_p = std::getenv("SPM_SIZE"))
+ spm_size = std::string(env_p);
+
+ std::ofstream outfile(argv[1]);
+
+ outfile << "SPM_SIZE=" << spm_size;
+
+ outfile.close();
+
+ return EXIT_SUCCESS;
+}
About
-----
-Last update: 2020-09-15
+Last update: 2023-01-30
This document explains about 'one-prepare-venv' command.
-'one-prepare-venv' will prepare python3.8 virtual environment with tensorflow-cpu
-version 2.3.0, recommanded 2.x version as of now, so that 'one-import-tf'
+'one-prepare-venv' will prepare python3 virtual environment with tensorflow-cpu
+version 2.8.0, recommanded 2.x version as of now, so that 'one-import-tf'
command can execute properly.
-'one-prepare-venv' will also prepare onnx and onnx-tensorflow version 1.7.0 so
+'one-prepare-venv' will also prepare onnx and onnx-tensorflow version 1.10.0 so
that 'one-import-onnx' command can execute properly.
- fuse_instnorm: This will convert instance normalization related operators to
one InstanceNormalization operator that our onert provides for faster
execution.
+- fuse_prelu: This will fuse operators to PReLU operator
- fuse_preactivation_batchnorm: This fuses batch normalization operators of pre-activations to Conv operators.
- fuse_activation_function: This fuses Activation function to a preceding operator.
- fuse_mean_with_mean: This fuses two consecutive ReduceMean operations into one.
import os
import sys
-import utils as _utils
+import onelib.utils as oneutils
# TODO Find better way to suppress trackback on error
# This suppression is applied only to `one-build`
parser = argparse.ArgumentParser(
description='command line tool to run ONE drivers in customized order')
- _utils._add_default_arg(parser)
+ oneutils.add_default_arg(parser)
- opt_name_list = _utils._get_optimization_list(get_name=True)
+ opt_name_list = oneutils.get_optimization_list(get_name=True)
opt_name_list = ['-' + s for s in opt_name_list]
if not opt_name_list:
opt_help_message = '(No available optimization options)'
args = parser.parse_args()
# print version
if args.version:
- _utils._print_version_and_exit(__file__)
+ oneutils.print_version_and_exit(__file__)
return args
def _verify_arg(parser, args):
"""verify given arguments"""
# check if required arguments is given
- if not _utils._is_valid_attr(args, 'config'):
+ if not oneutils.is_valid_attr(args, 'config'):
parser.error('-C/--config argument is required')
# check if given optimization option exists
- opt_name_list = _utils._get_optimization_list(get_name=True)
- opt_name_list = [_utils._remove_prefix(s, 'O') for s in opt_name_list]
- if _utils._is_valid_attr(args, 'O'):
+ opt_name_list = oneutils.get_optimization_list(get_name=True)
+ opt_name_list = [oneutils.remove_prefix(s, 'O') for s in opt_name_list]
+ if oneutils.is_valid_attr(args, 'O'):
if ' ' in getattr(args, 'O'):
parser.error('Not allowed to have space in the optimization name')
if not getattr(args, 'O') in opt_name_list:
}[driver_name]
-def _parse_cfg(args):
+def parse_cfg(args):
config = configparser.ConfigParser()
config.optionxform = str
parsed = config.read(os.path.expanduser(getattr(args, 'config')))
# verify given optimization option file
def _verify_opt(args):
- if _utils._is_valid_attr(args, 'O'):
+ if oneutils.is_valid_attr(args, 'O'):
config = configparser.ConfigParser()
config.optionxform = str
opt_name_path_dic = dict(
- zip(_utils._get_optimization_list(get_name=True),
- _utils._get_optimization_list()))
+ zip(oneutils.get_optimization_list(get_name=True),
+ oneutils.get_optimization_list()))
parsed = config.read(opt_name_path_dic['O' + getattr(args, 'O')])
# check if given optimization option file exists
if not parsed:
_verify_arg(parser, args)
# parse configuration file
- config = _parse_cfg(args)
+ config = parse_cfg(args)
# verify configuration file
bin_dir = os.path.dirname(os.path.realpath(__file__))
- import_drivers_dict = _utils._detect_one_import_drivers(bin_dir)
+ import_drivers_dict = oneutils.detect_one_import_drivers(bin_dir)
transform_drivers = [
'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile',
'one-partition'
driver_name = _get_driver_name(section)
driver_path = os.path.join(dir_path, driver_name)
cmd = [driver_path, '--config', getattr(args, 'config'), '--section', section]
- if section == 'one-optimize' and _utils._is_valid_attr(args, 'O'):
+ if section == 'one-optimize' and oneutils.is_valid_attr(args, 'O'):
cmd += ['-O', getattr(args, 'O')]
- _utils._run(cmd)
+ oneutils.run(cmd)
if __name__ == '__main__':
- _utils._safemain(main, __file__)
+ oneutils.safemain(main, __file__)
import sys
import shutil
-import utils as _utils
+import onelib.utils as oneutils
# TODO Find better way to suppress trackback on error
sys.tracebacklimit = 0
parser = argparse.ArgumentParser(
description='command line tool for code generation', usage=codegen_usage)
- _utils._add_default_arg(parser)
+ oneutils.add_default_arg(parser)
# get backend list in the directory
backends_name = [ntpath.basename(f) for f in backends_list]
"""verify given arguments"""
# check if required arguments is given
missing = []
- if not _utils._is_valid_attr(args, 'backend'):
+ if not oneutils.is_valid_attr(args, 'backend'):
missing.append('-b/--backend')
if len(missing):
parser.error('the following arguments are required: ' + ' '.join(missing))
del argv[0]
# split by '--'
args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x]
+ if len(args) == 0:
+ codegen_args = parser.parse_args(codegen_args)
# one-codegen has two interfaces
# 1. one-codegen [-h] [-v] [-C CONFIG] [-b BACKEND] [COMMANDS FOR BACKEND]
if len(args) == 1:
codegen_args = parser.parse_args(codegen_args)
# print version
if len(args) and codegen_args.version:
- _utils._print_version_and_exit(__file__)
+ oneutils.print_version_and_exit(__file__)
return codegen_args, backend_args, unknown_args
args, backend_args, unknown_args = _parse_arg(parser)
# parse configuration file
- _utils._parse_cfg(args, 'one-codegen')
+ oneutils.parse_cfg(args.config, 'one-codegen', args)
# verify arguments
_verify_arg(parser, args)
if not codegen_path:
raise FileNotFoundError(backend_base + ' not found')
codegen_cmd = [codegen_path] + backend_args + unknown_args
- if _utils._is_valid_attr(args, 'command'):
+ if oneutils.is_valid_attr(args, 'command'):
codegen_cmd += getattr(args, 'command').split()
# run backend driver
- _utils._run(codegen_cmd, err_prefix=backend_base)
+ oneutils.run(codegen_cmd, err_prefix=backend_base)
if __name__ == '__main__':
- _utils._safemain(main, __file__)
+ oneutils.safemain(main, __file__)
import subprocess
import sys
-import utils as _utils
+import onelib.utils as oneutils
def _get_parser():
dir_path = os.path.dirname(os.path.realpath(__file__))
# make cmd
cmd = [sys.executable, os.path.join(dir_path, _get_driver_name(args.driver))]
- if _utils._is_valid_attr(args, 'config'):
+ if oneutils.is_valid_attr(args, 'config'):
cmd.append('--config')
cmd.append(os.path.expanduser(args.config))
return_code = subprocess.call(cmd + unknown_args)
if __name__ == '__main__':
- _utils._safemain(main, __file__)
+ oneutils.safemain(main, __file__)
import tempfile
import onelib.make_cmd as _make_cmd
-import utils as _utils
+import onelib.utils as oneutils
import generate_bcq_output_arrays as _bcq_info_gen
# TODO Find better way to suppress trackback on error
parser = argparse.ArgumentParser(
description='command line tool to convert TensorFlow with BCQ to circle')
- _utils._add_default_arg(parser)
+ oneutils.add_default_arg(parser)
## tf2tfliteV2 arguments
tf2tfliteV2_group = parser.add_argument_group('converter arguments')
"""verify given arguments"""
# check if required arguments is given
missing = []
- if not _utils._is_valid_attr(args, 'input_path'):
+ if not oneutils.is_valid_attr(args, 'input_path'):
missing.append('-i/--input_path')
- if not _utils._is_valid_attr(args, 'output_path'):
+ if not oneutils.is_valid_attr(args, 'output_path'):
missing.append('-o/--output_path')
if len(missing):
parser.error('the following arguments are required: ' + ' '.join(missing))
args = parser.parse_args()
# print version
if args.version:
- _utils._print_version_and_exit(__file__)
+ oneutils.print_version_and_exit(__file__)
return args
"""make a command for running generate_bcq_metadata"""
cmd = [sys.executable, driver_path]
# input_path
- if _utils._is_valid_attr(args, 'input_path'):
+ if oneutils.is_valid_attr(args, 'input_path'):
cmd.append('--input_path')
cmd.append(os.path.expanduser(getattr(args, 'input_path')))
# output_path
- if _utils._is_valid_attr(args, 'output_path'):
+ if oneutils.is_valid_attr(args, 'output_path'):
cmd.append('--output_path')
cmd.append(os.path.expanduser(output_path))
# output_arrays
- if _utils._is_valid_attr(args, 'output_arrays'):
+ if oneutils.is_valid_attr(args, 'output_arrays'):
cmd.append('--output_arrays')
cmd.append(getattr(args, 'output_arrays'))
f.write((' '.join(generate_bcq_metadata_cmd) + '\n').encode())
# generate BCQ information metadata
- _utils._run(generate_bcq_metadata_cmd, logfile=f)
+ oneutils.run(generate_bcq_metadata_cmd, logfile=f)
# get output_arrays with BCQ
bcq_output_arrays = _bcq_info_gen.get_bcq_output_arrays(
f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
# convert tf to tflite
- _utils._run(tf2tfliteV2_cmd, logfile=f)
+ oneutils.run(tf2tfliteV2_cmd, logfile=f)
# make a command to convert from tflite to circle
tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
f.write((' '.join(tflite2circle_cmd) + '\n').encode())
# convert tflite to circle
- _utils._run(tflite2circle_cmd, logfile=f)
+ oneutils.run(tflite2circle_cmd, logfile=f)
def main():
args = _parse_arg(parser)
# parse configuration file
- _utils._parse_cfg(args, 'one-import-bcq')
+ oneutils.parse_cfg(args.config, 'one-import-bcq', args)
# verify arguments
_verify_arg(parser, args)
if __name__ == '__main__':
- _utils._safemain(main, __file__)
+ oneutils.safemain(main, __file__)
_onnx_legalizer_enabled = False
import onelib.make_cmd as _make_cmd
-import utils as _utils
+import onelib.utils as oneutils
# TODO Find better way to suppress trackback on error
sys.tracebacklimit = 0
parser = argparse.ArgumentParser(
description='command line tool to convert ONNX to circle')
- _utils._add_default_arg(parser)
+ oneutils.add_default_arg(parser)
## tf2tfliteV2 arguments
tf2tfliteV2_group = parser.add_argument_group('converter arguments')
"""verify given arguments"""
# check if required arguments is given
missing = []
- if not _utils._is_valid_attr(args, 'input_path'):
+ if not oneutils.is_valid_attr(args, 'input_path'):
missing.append('-i/--input_path')
- if not _utils._is_valid_attr(args, 'output_path'):
+ if not oneutils.is_valid_attr(args, 'output_path'):
missing.append('-o/--output_path')
if len(missing):
parser.error('the following arguments are required: ' + ' '.join(missing))
args = parser.parse_args()
# print version
if args.version:
- _utils._print_version_and_exit(__file__)
+ oneutils.print_version_and_exit(__file__)
return args
with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
# save intermediate
- if _utils._is_valid_attr(args, 'save_intermediate'):
+ if oneutils.is_valid_attr(args, 'save_intermediate'):
tmpdir = os.path.dirname(logfile_path)
# convert onnx to tf saved model
onnx_model = onnx.load(getattr(args, 'input_path'))
if _onnx_legalizer_enabled:
options = onnx_legalizer.LegalizeOptions
- options.unroll_rnn = _utils._is_valid_attr(args, 'unroll_rnn')
- options.unroll_lstm = _utils._is_valid_attr(args, 'unroll_lstm')
+ options.unroll_rnn = oneutils.is_valid_attr(args, 'unroll_rnn')
+ options.unroll_lstm = oneutils.is_valid_attr(args, 'unroll_lstm')
onnx_legalizer.legalize(onnx_model, options)
- if _utils._is_valid_attr(args, 'keep_io_order'):
+ if oneutils.is_valid_attr(args, 'keep_io_order'):
_remap_io_names(onnx_model)
- if _utils._is_valid_attr(args, 'save_intermediate'):
+ if oneutils.is_valid_attr(args, 'save_intermediate'):
basename = os.path.basename(getattr(args, 'input_path'))
fixed_path = os.path.join(tmpdir,
os.path.splitext(basename)[0] + '~.onnx')
f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
# convert tf to tflite
- _utils._run(tf2tfliteV2_cmd, logfile=f)
+ oneutils.run(tf2tfliteV2_cmd, logfile=f)
# make a command to convert from tflite to circle
tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
f.write((' '.join(tflite2circle_cmd) + '\n').encode())
# convert tflite to circle
- _utils._run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
+ oneutils.run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
def main():
args = _parse_arg(parser)
# parse configuration file
- _utils._parse_cfg(args, 'one-import-onnx')
+ oneutils.parse_cfg(args.config, 'one-import-onnx', args)
# verify arguments
_verify_arg(parser, args)
if __name__ == '__main__':
- _utils._safemain(main, __file__)
+ oneutils.safemain(main, __file__)
import onnx_legalizer
import onelib.make_cmd as _make_cmd
-import utils as _utils
+import onelib.utils as oneutils
# TODO Find better way to suppress trackback on error
sys.tracebacklimit = 0
def get_driver_spec():
- return ("one-import-pytorch", _utils.DriverType.IMPORTER)
+ return ("one-import-pytorch", oneutils.DriverType.IMPORTER)
def _get_parser():
parser = argparse.ArgumentParser(
description='command line tool to convert PyTorch to Circle')
- _utils._add_default_arg(parser)
+ oneutils.add_default_arg(parser)
## converter arguments
converter_group = parser.add_argument_group('converter arguments')
"""verify given arguments"""
# check if required arguments is given
missing = []
- if not _utils._is_valid_attr(args, 'input_path'):
+ if not oneutils.is_valid_attr(args, 'input_path'):
missing.append('-i/--input_path')
- if not _utils._is_valid_attr(args, 'output_path'):
+ if not oneutils.is_valid_attr(args, 'output_path'):
missing.append('-o/--output_path')
- if not _utils._is_valid_attr(args, 'input_shapes'):
+ if not oneutils.is_valid_attr(args, 'input_shapes'):
missing.append('-s/--input_shapes')
- if not _utils._is_valid_attr(args, 'input_types'):
+ if not oneutils.is_valid_attr(args, 'input_types'):
missing.append('-t/--input_types')
if len(missing):
args = parser.parse_args()
# print version
if args.version:
- _utils._print_version_and_exit(__file__)
+ oneutils.print_version_and_exit(__file__)
return args
logfile_path = os.path.realpath(args.output_path) + '.log'
with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
# save intermediate
- if _utils._is_valid_attr(args, 'save_intermediate'):
+ if oneutils.is_valid_attr(args, 'save_intermediate'):
tmpdir = os.path.dirname(logfile_path)
# convert pytorch to onnx model
input_path = getattr(args, 'input_path')
onnx_model = onnx.load(onnx_output_path)
options = onnx_legalizer.LegalizeOptions()
- options.unroll_rnn = _utils._is_valid_attr(args, 'unroll_rnn')
- options.unroll_lstm = _utils._is_valid_attr(args, 'unroll_lstm')
+ options.unroll_rnn = oneutils.is_valid_attr(args, 'unroll_rnn')
+ options.unroll_lstm = oneutils.is_valid_attr(args, 'unroll_lstm')
onnx_legalizer.legalize(onnx_model, options)
tf_savedmodel = onnx_tf.backend.prepare(onnx_model)
f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
# convert tf to tflite
- _utils._run(tf2tfliteV2_cmd, logfile=f)
+ oneutils.run(tf2tfliteV2_cmd, logfile=f)
# make a command to convert from tflite to circle
tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
f.write((' '.join(tflite2circle_cmd) + '\n').encode())
# convert tflite to circle
- _utils._run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
+ oneutils.run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
def main():
args = _parse_arg(parser)
# parse configuration file
- _utils._parse_cfg(args, 'one-import-pytorch')
+ oneutils.parse_cfg(args.config, 'one-import-pytorch', args)
# verify arguments
_verify_arg(parser, args)
if __name__ == '__main__':
- _utils._safemain(main, __file__)
+ oneutils.safemain(main, __file__)
import tempfile
import onelib.make_cmd as _make_cmd
-import utils as _utils
+import onelib.utils as oneutils
def get_driver_cfg_section():
parser = argparse.ArgumentParser(
description='command line tool to convert TensorFlow to circle')
- _utils._add_default_arg(parser)
+ oneutils.add_default_arg(parser)
## tf2tfliteV2 arguments
tf2tfliteV2_group = parser.add_argument_group('converter arguments')
action='store_true',
help='Save intermediate files to output folder')
+ # experimental options
+ parser.add_argument(
+ '--experimental_disable_batchmatmul_unfold',
+ action='store_true',
+ help='Experimental disable BatchMatMul unfold')
+
return parser
"""verify given arguments"""
# check if required arguments is given
missing = []
- if not _utils._is_valid_attr(args, 'input_path'):
+ if not oneutils.is_valid_attr(args, 'input_path'):
missing.append('-i/--input_path')
- if not _utils._is_valid_attr(args, 'output_path'):
+ if not oneutils.is_valid_attr(args, 'output_path'):
missing.append('-o/--output_path')
if len(missing):
parser.error('the following arguments are required: ' + ' '.join(missing))
args = parser.parse_args()
# print version
if args.version:
- _utils._print_version_and_exit(__file__)
+ oneutils.print_version_and_exit(__file__)
return args
with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
# save intermediate
- if _utils._is_valid_attr(args, 'save_intermediate'):
+ if oneutils.is_valid_attr(args, 'save_intermediate'):
tmpdir = os.path.dirname(logfile_path)
# make a command to convert from tf to tflite
tf2tfliteV2_path = os.path.join(dir_path, 'tf2tfliteV2.py')
f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
# convert tf to tflite
- _utils._run(tf2tfliteV2_cmd, logfile=f)
+ oneutils.run(tf2tfliteV2_cmd, logfile=f)
# make a command to convert from tflite to circle
tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
f.write((' '.join(tflite2circle_cmd) + '\n').encode())
# convert tflite to circle
- _utils._run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
+ oneutils.run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
def main():
args = _parse_arg(parser)
# parse configuration file
- _utils._parse_cfg(args, 'one-import-tf')
+ oneutils.parse_cfg(args.config, 'one-import-tf', args)
# verify arguments
_verify_arg(parser, args)
if __name__ == '__main__':
- _utils._safemain(main, __file__)
+ oneutils.safemain(main, __file__)
import sys
import onelib.make_cmd as _make_cmd
-import utils as _utils
+import onelib.utils as oneutils
# TODO Find better way to suppress trackback on error
sys.tracebacklimit = 0
parser = argparse.ArgumentParser(
description='command line tool to convert TensorFlow lite to circle')
- _utils._add_default_arg(parser)
+ oneutils.add_default_arg(parser)
## tflite2circle arguments
tflite2circle_group = parser.add_argument_group('converter arguments')
"""verify given arguments"""
# check if required arguments is given
missing = []
- if not _utils._is_valid_attr(args, 'input_path'):
+ if not oneutils.is_valid_attr(args, 'input_path'):
missing.append('-i/--input_path')
- if not _utils._is_valid_attr(args, 'output_path'):
+ if not oneutils.is_valid_attr(args, 'output_path'):
missing.append('-o/--output_path')
if len(missing):
parser.error('the following arguments are required: ' + ' '.join(missing))
args = parser.parse_args()
# print version
if args.version:
- _utils._print_version_and_exit(__file__)
+ oneutils.print_version_and_exit(__file__)
return args
f.write((' '.join(tflite2circle_cmd) + '\n').encode())
# convert tflite to circle
- _utils._run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
+ oneutils.run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
def main():
args = _parse_arg(parser)
# parse configuration file
- _utils._parse_cfg(args, 'one-import-tflite')
+ oneutils.parse_cfg(args.config, 'one-import-tflite', args)
# verify arguments
_verify_arg(parser, args)
if __name__ == '__main__':
- _utils._safemain(main, __file__)
+ oneutils.safemain(main, __file__)
import os
import sys
-import utils as _utils
+import onelib.utils as oneutils
# TODO Find better way to suppress trackback on error
sys.tracebacklimit = 0
-def _get_backends_list():
- """
- [one hierarchy]
- one
- ├── backends
- ├── bin
- ├── doc
- ├── include
- ├── lib
- ├── optimization
- └── test
-
- The list where `one-infer` finds its backends
- - `bin` folder where `one-infer` exists
- - `backends` folder
-
- NOTE If there are backends of the same name in different places,
- the closer to the top in the list, the higher the priority.
- """
- dir_path = os.path.dirname(os.path.realpath(__file__))
- backend_set = set()
-
- # bin folder
- files = [f for f in glob.glob(dir_path + '/*-infer')]
- # backends folder
- files += [f for f in glob.glob(dir_path + '/../backends/**/*-infer', recursive=True)]
- # TODO find backends in `$PATH`
-
- backends_list = []
- for cand in files:
- base = ntpath.basename(cand)
- if (not base in backend_set) and os.path.isfile(cand) and os.access(
- cand, os.X_OK):
- backend_set.add(base)
- backends_list.append(cand)
-
- return backends_list
-
-
def _search_backend_driver(driver):
"""
[one hierarchy]
return None
-def _get_parser(backends_list):
- infer_usage = 'one-infer [-h] [-v] [-C CONFIG] [-d DRIVER | -b BACKEND] [--post-process POST_PROCESS] [--] [COMMANDS FOR BACKEND DRIVER]'
+def _get_parser():
+ infer_usage = 'one-infer [-h] [-v] [-C CONFIG] [-d DRIVER] [--post-process POST_PROCESS] [--] [COMMANDS FOR BACKEND DRIVER]'
+ infer_detail = """
+one-infer provides post-processing after invoking backend inference driver
+use python script and its arguments to '--post-process' argument as below
+one-infer -d dummy-infer --post-process "script.py arg1 arg2" -- [arguments for dummy-infer]
+"""
parser = argparse.ArgumentParser(
- description='command line tool to infer model', usage=infer_usage)
+ description='command line tool to infer model',
+ usage=infer_usage,
+ epilog=infer_detail,
+ formatter_class=argparse.RawTextHelpFormatter)
- _utils._add_default_arg(parser)
+ oneutils.add_default_arg(parser)
- # TODO: add tflite/onnx-infer driver to helper message when it is implemented
driver_help_message = 'backend inference driver name to execute'
parser.add_argument('-d', '--driver', type=str, help=driver_help_message)
- # get backend list in the directory
- backends_name = [ntpath.basename(f) for f in backends_list]
- if not backends_name:
- backends_name_message = '(There is no available backend drivers)'
- else:
- backends_name_message = '(available backend drivers: ' + ', '.join(
- backends_name) + ')'
- backend_help_message = 'backend name to use ' + backends_name_message
- parser.add_argument('-b', '--backend', type=str, help=backend_help_message)
-
- post_process_help_message = 'post processing script to convert I/O data to standard format'
+ post_process_help_message = 'post processing python script and arguments which can be used to convert I/O data to standard format'
parser.add_argument('--post-process', type=str, help=post_process_help_message)
return parser
def _verify_arg(parser, args):
"""verify given arguments"""
- # `-d/--driver` and `-b/--backend` are mutually exclusive arguments.
- if _utils._is_valid_attr(args, 'driver') and _utils._is_valid_attr(args, 'backend'):
- parser.error(
- '-d and -b options are mutually exclusive. Please use only one of them')
-
missing = []
- if not _utils._is_valid_attr(args, 'driver') and not _utils._is_valid_attr(
- args, 'backend'):
- missing.append('{-d/--driver | -b/--backend}')
+ if not oneutils.is_valid_attr(args, 'driver'):
+ missing.append('-d/--driver')
if len(missing):
parser.error('the following arguments are required: ' + ' '.join(missing))
# split by '--'
args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x]
- # one-infer [-h] [-v] [-C CONFIG] [-d DRIVER] [-b BACKEND] [--post-process POST_PROCESS] -- [COMMANDS FOR BACKEND DRIVER]
+ # one-infer [-h] [-v] [-C CONFIG] [-d DRIVER] [--post-process POST_PROCESS] -- [COMMANDS FOR BACKEND DRIVER]
if len(args):
infer_args = args[0]
infer_args = parser.parse_args(infer_args)
backend_args = backend_args if len(args) < 2 else args[1]
+ else:
+ infer_args = parser.parse_args(infer_args)
# print version
if len(args) and infer_args.version:
- _utils._print_version_and_exit(__file__)
+ oneutils.print_version_and_exit(__file__)
return infer_args, backend_args
-def _get_executable(args, backends_list):
- driver = _utils._is_valid_attr(args, 'driver')
- if driver:
- executable = _search_backend_driver(driver)
- if executable:
- return executable
- else:
- raise FileNotFoundError(driver + ' not found')
+def _get_executable(args):
+ driver = oneutils.is_valid_attr(args, 'driver')
- if _utils._is_valid_attr(args, 'backend'):
- backend_base = getattr(args, 'backend') + '-infer'
- for cand in backends_list:
- if ntpath.basename(cand) == backend_base:
- return cand
- raise FileNotFoundError(backend_base + ' not found')
+ executable = _search_backend_driver(driver)
+ if executable:
+ return executable
+ else:
+ raise FileNotFoundError(driver + ' not found')
def main():
- # get backend list
- backends_list = _get_backends_list()
-
# parse arguments
- parser = _get_parser(backends_list)
+ parser = _get_parser()
args, backend_args = _parse_arg(parser)
# parse configuration file
- _utils._parse_cfg(args, 'one-infer')
+ oneutils.parse_cfg(args.config, 'one-infer', args)
# verify arguments
_verify_arg(parser, args)
# make a command to run given backend driver
- driver_path = _get_executable(args, backends_list)
+ driver_path = _get_executable(args)
infer_cmd = [driver_path] + backend_args
- if _utils._is_valid_attr(args, 'command'):
+ if oneutils.is_valid_attr(args, 'command'):
infer_cmd += getattr(args, 'command').split()
# run backend driver
- _utils._run(infer_cmd, err_prefix=ntpath.basename(driver_path))
+ oneutils.run(infer_cmd, err_prefix=ntpath.basename(driver_path))
# run post process script if it's given
- if _utils._is_valid_attr(args, 'post_process'):
+ if oneutils.is_valid_attr(args, 'post_process'):
# NOTE: the given python script will be executed by venv of ONE
python_path = sys.executable
post_process_command = [python_path] + getattr(args,
'post_process').strip().split(' ')
- _utils._run(post_process_command, err_prefix='one-infer')
+ oneutils.run(post_process_command, err_prefix='one-infer')
if __name__ == '__main__':
- _utils._safemain(main, __file__)
+ oneutils.safemain(main, __file__)
import sys
import configparser
-import utils as _utils
+import onelib.utils as oneutils
# TODO Find better way to suppress trackback on error
sys.tracebacklimit = 0
+class InputOutputPath:
+ '''
+ Class that remembers input circle file and output circle file of section k,
+
+ After calling enter_new_section(),
+ output path in section k will be used as input path of section k+1
+ '''
+
+ def __init__(self, initial_input_path: str):
+ self._first_step = True
+ self._input_path = initial_input_path
+ self._output_path = ''
+
+ def enter_new_section(self, section_output_path: str):
+ '''
+ Call this when starting a section
+ '''
+ if self._first_step == True:
+ self._output_path = section_output_path
+ else:
+ self._input_path = self._output_path
+ self._output_path = section_output_path
+
+ self._first_step = False
+
+ def input_path(self):
+ return self._input_path
+
+ def output_path(self):
+ return self._output_path
+
+
class CommentableConfigParser(configparser.ConfigParser):
"""
ConfigParser where comment can be stored
'Currently tflite and onnx models are supported',
usage=init_usage)
- _utils._add_default_arg_no_CS(parser)
+ oneutils.add_default_arg_no_CS(parser)
parser.add_argument(
'-i', '--input_path', type=str, help='full filepath of the input model file')
def _verify_arg(parser, args):
# check if required arguments is given
missing = []
- if not _utils._is_valid_attr(args, 'input_path'):
+ if not oneutils.is_valid_attr(args, 'input_path'):
missing.append('-i/--input_path')
- if not _utils._is_valid_attr(args, 'output_path'):
+ if not oneutils.is_valid_attr(args, 'output_path'):
missing.append('-o/--output_path')
- if not _utils._is_valid_attr(args, 'backend'):
+ if not oneutils.is_valid_attr(args, 'backend'):
missing.append('-b/--backend')
- if _utils._is_valid_attr(args, 'model_type'):
+ if oneutils.is_valid_attr(args, 'model_type'):
# TODO Support model types other than onnx and tflite (e.g., TF)
if getattr(args, 'model_type') not in ['onnx', 'tflite']:
parser.error('Allowed value for --model_type: "onnx" or "tflite"')
- if _utils._is_valid_attr(args, 'nchw_to_nhwc_input_shape'):
- if not _utils._is_valid_attr(args, 'convert_nchw_to_nhwc'):
+ if oneutils.is_valid_attr(args, 'nchw_to_nhwc_input_shape'):
+ if not oneutils.is_valid_attr(args, 'convert_nchw_to_nhwc'):
missing.append('--convert_nchw_to_nhwc')
- if _utils._is_valid_attr(args, 'nchw_to_nhwc_output_shape'):
- if not _utils._is_valid_attr(args, 'convert_nchw_to_nhwc'):
+ if oneutils.is_valid_attr(args, 'nchw_to_nhwc_output_shape'):
+ if not oneutils.is_valid_attr(args, 'convert_nchw_to_nhwc'):
missing.append('--convert_nchw_to_nhwc')
if len(missing):
backend_args = backend_args if len(args) < 2 else args[1]
# print version
if len(args) and init_args.version:
- _utils._print_version_and_exit(__file__)
+ oneutils.print_version_and_exit(__file__)
return init_args, backend_args
def _get_executable(args, backends_list):
- if _utils._is_valid_attr(args, 'backend'):
+ if oneutils.is_valid_attr(args, 'backend'):
backend_base = getattr(args, 'backend') + '-init'
for cand in backends_list:
if ntpath.basename(cand) == backend_base:
# TODO Support workflow format (https://github.com/Samsung/ONE/pull/9354)
-def _generate():
+def _generate(args, model_type: str, inout_path: InputOutputPath):
# generate cfg file
config = CommentableConfigParser()
+ model_dir = os.path.dirname(args.input_path)
+ model_name = os.path.basename(args.input_path).split('.')[0]
+
+ def _assert_section(section: str):
+ if not config.has_section(section):
+ raise RuntimeError(f'Cannot find section: {section}')
def _add_onecc_sections():
- pass # NYI
+ '''
+ This adds all sections
+ '''
+ config.add_section('onecc')
+ sections = [
+ f'one-import-{model_type}', 'one-optimize', 'one-quantize', 'one-codegen'
+ ]
+
+ for section in sections:
+ config['onecc'][section] = 'True'
+ # add empty section as a preperation of next procedure
+ config.add_section(section)
def _gen_import():
- pass # NYI
+ section = f'one-import-{model_type}'
+ _assert_section(section)
+
+ output_path = os.path.join(model_dir, f'{model_name}.circle')
+ inout_path.enter_new_section(section_output_path=output_path)
+ config[section]['input_path'] = inout_path.input_path()
+ config[section]['output_path'] = inout_path.output_path()
def _gen_optimize():
- pass # NYI
+ section = 'one-optimize'
+ _assert_section(section)
+
+ output_path = os.path.join(model_dir, f'{model_name}.opt.circle')
+ inout_path.enter_new_section(section_output_path=output_path)
+ config[section]['input_path'] = inout_path.input_path()
+ config[section]['output_path'] = inout_path.output_path()
+
+ # TODO Add optimization optinos
def _gen_quantize():
- pass # NYI
+ section = 'one-quantize'
+ _assert_section(section)
+
+ output_path = os.path.join(model_dir, f'{model_name}.q.circle')
+ inout_path.enter_new_section(section_output_path=output_path)
+ config[section]['input_path'] = inout_path.input_path()
+ config[section]['output_path'] = inout_path.output_path()
def _gen_codegen():
- pass # NYI
+ section = 'one-codegen'
+ _assert_section(section)
+
+ # [backend]-init must provide default value for 'command'
+ config[section]['backend'] = args.backend
#
# NYI: one-profile, one-partition, one-pack, one-infer
config.write(f)
+def _get_model_type(parser, args):
+ if oneutils.is_valid_attr(args, 'model_type'):
+ return args.model_type
+
+ if oneutils.is_valid_attr(args, 'input_path'):
+ _, ext = os.path.splitext(args.input_path)
+
+ # ext would be, e.g., '.tflite' or '.onnx'.
+ # Note: when args.input_path does not have an extension, e.g., '/home/foo'
+ # ext after os.path.splitext() is '' and ''[1:] is still ''.
+ # TODO support tensorflow model
+ ext = ext[1:]
+ if ext in ["tflite", "onnx"]:
+ return ext
+ else:
+ parser.error(f'following file extensions are supported: ".onnx" ".tflite"')
+
+ parser.error(f'the following argument is required: --input_path')
+
+
def main():
# get backend list
backends_list = _get_backends_list()
# verify arguments
_verify_arg(parser, args)
+ model_type = _get_model_type(parser, args)
+ inout_path = InputOutputPath(args.input_path)
+ _generate(args, model_type, inout_path)
+
# make a command to run given backend driver
driver_path = _get_executable(args, backends_list)
init_cmd = [driver_path] + backend_args
# run backend driver
- _utils._run(init_cmd, err_prefix=ntpath.basename(driver_path))
-
- #TODO generate cfg file
+ oneutils.run(init_cmd, err_prefix=ntpath.basename(driver_path))
raise NotImplementedError("NYI")
if __name__ == '__main__':
- _utils._safemain(main, __file__)
+ oneutils.safemain(main, __file__)
import onelib.constant as _constant
import onelib.make_cmd as _make_cmd
-import utils as _utils
+import onelib.utils as oneutils
# TODO Find better way to suppress trackback on error
sys.tracebacklimit = 0
parser = argparse.ArgumentParser(
description='command line tool to optimize circle model')
- _utils._add_default_arg(parser)
+ oneutils.add_default_arg(parser)
## utility arguments
utility_group = parser.add_argument_group('arguments for utility')
"""verify given arguments"""
# check if required arguments is given
missing = []
- if not _utils._is_valid_attr(args, 'input_path'):
+ if not oneutils.is_valid_attr(args, 'input_path'):
missing.append('-i/--input_path')
- if not _utils._is_valid_attr(args, 'output_path'):
+ if not oneutils.is_valid_attr(args, 'output_path'):
missing.append('-o/--output_path')
if len(missing):
parser.error('the following arguments are required: ' + ' '.join(missing))
args = parser.parse_args()
# print version
if args.version:
- _utils._print_version_and_exit(__file__)
+ oneutils.print_version_and_exit(__file__)
return args
getattr(args, 'output_path'))
# verbose
- if _utils._is_valid_attr(args, 'verbose'):
+ if oneutils.is_valid_attr(args, 'verbose'):
circle2circle_cmd.append('--verbose')
- if _utils._is_valid_attr(args, 'change_outputs'):
+ if oneutils.is_valid_attr(args, 'change_outputs'):
circle2circle_cmd.append('--change_outputs')
circle2circle_cmd.append(getattr(args, 'change_outputs'))
f.write((' '.join(circle2circle_cmd) + '\n').encode())
# optimize
- _utils._run(circle2circle_cmd, err_prefix="circle2circle", logfile=f)
+ oneutils.run(circle2circle_cmd, err_prefix="circle2circle", logfile=f)
def _parse_opt(args):
- if _utils._is_valid_attr(args, 'O'):
+ if oneutils.is_valid_attr(args, 'O'):
opt_name_path_dic = dict(
- zip(_utils._get_optimization_list(get_name=True),
- _utils._get_optimization_list()))
+ zip(oneutils.get_optimization_list(get_name=True),
+ oneutils.get_optimization_list()))
config_path = opt_name_path_dic['O' + getattr(args, 'O')]
- _utils._parse_cfg_and_overwrite(config_path, 'one-optimize', args)
+ # group option do not overwrite existing args
+ oneutils.parse_cfg(config_path, 'one-optimize', args)
+# There are several cases to receive the optimization options:
+# - Indivisual option
+# 1. From command line
+# 2. From cfg file
+# - Group option
+# 3. From command line
+#
+# Their priority is as follows, since each option can be given simultaneously:
+# 1. Indivisual option from command line
+# 2. Indivisual option from cfg file
+# 3. Group option from command line
+#
+# To follow their priority, options with higher priority should be parsed first.
+#
+# DO NOT MODIFY the order of below function calls.
+#
+# NOTE. Assume all the optimization options must follow 'store_true' only.
+# NOTE. Group option from cfg file (`include` in `[onecc]` section) is passed
+# as a command line argument.
def main():
# parse arguments
parser = _get_parser()
args = _parse_arg(parser)
# parse configuration file
- _utils._parse_cfg(args, 'one-optimize')
+ oneutils.parse_cfg(args.config, 'one-optimize', args)
# parse optimization file
# NOTE if there is a `one-optimize` section in above configuration file as well,
if __name__ == '__main__':
- _utils._safemain(main, __file__)
+ oneutils.safemain(main, __file__)
import os
import sys
-import utils as _utils
+import onelib.utils as oneutils
# TODO Find better way to suppress trackback on error
sys.tracebacklimit = 0
parser = argparse.ArgumentParser(
description='command line tool to package circle and metadata into nnpackage')
- _utils._add_default_arg(parser)
+ oneutils.add_default_arg(parser)
## model2nnpkg arguments
model2nnpkg_group = parser.add_argument_group('arguments for packaging')
"""verify given arguments"""
# check if required arguments is given
missing = []
- if not _utils._is_valid_attr(args, 'input_path'):
+ if not oneutils.is_valid_attr(args, 'input_path'):
missing.append('-i/--input_path')
- if not _utils._is_valid_attr(args, 'output_path'):
+ if not oneutils.is_valid_attr(args, 'output_path'):
missing.append('-o/--output_path')
if len(missing):
parser.error('the following arguments are required: ' + ' '.join(missing))
args = parser.parse_args()
# print version
if args.version:
- _utils._print_version_and_exit(__file__)
+ oneutils.print_version_and_exit(__file__)
return args
cmd = [os.path.expanduser(driver_path)]
cmd.append('-o')
cmd.append(os.path.expanduser(output_path))
+ cmd.append('-m')
cmd.append(os.path.expanduser(input_path))
return cmd
with open(logfile_path, 'wb') as f:
# make a command to package circle model and metadata into nnpackage
- model2nnpkg_path = os.path.join(dir_path, 'model2nnpkg.sh')
+ model2nnpkg_path = os.path.join(dir_path, 'model2nnpkg')
model2nnpkg_cmd = _make_model2nnpkg_cmd(model2nnpkg_path,
getattr(args, 'input_path'),
getattr(args, 'output_path'))
f.write((' '.join(model2nnpkg_cmd) + '\n').encode())
# convert tflite to circle
- _utils._run(model2nnpkg_cmd, err_prefix="model2nnpkg.sh", logfile=f)
+ oneutils.run(model2nnpkg_cmd, err_prefix="model2nnpkg", logfile=f)
def main():
args = _parse_arg(parser)
# parse configuration file
- _utils._parse_cfg(args, 'one-pack')
+ oneutils.parse_cfg(args.config, 'one-pack', args)
# verify arguments
_verify_arg(parser, args)
if __name__ == '__main__':
- _utils._safemain(main, __file__)
+ oneutils.safemain(main, __file__)
import os
import sys
-import utils as _utils
+import onelib.utils as oneutils
# TODO Find better way to suppress trackback on error
sys.tracebacklimit = 0
parser = argparse.ArgumentParser(
description='command line tool to partition circle model by multiple backends')
- _utils._add_default_arg(parser)
+ oneutils.add_default_arg(parser)
parser.add_argument(
'--backends', type=str, help='backends in CSV to use for partitioning')
args = parser.parse_args()
# print version
if args.version:
- _utils._print_version_and_exit(__file__)
+ oneutils.print_version_and_exit(__file__)
return args
"""verify given arguments"""
# check if required arguments is given
missing = []
- if not _utils._is_valid_attr(args, 'part_file'):
+ if not oneutils.is_valid_attr(args, 'part_file'):
missing.append('part_file')
- if not _utils._is_valid_attr(args, 'input_file'):
+ if not oneutils.is_valid_attr(args, 'input_file'):
missing.append('input_file')
if len(missing):
parser.error('the following arguments are required: ' + ' '.join(missing))
cmd = [os.path.expanduser(circle_partitioner_path)]
- if _utils._is_valid_attr(args, 'backends'):
+ if oneutils.is_valid_attr(args, 'backends'):
cmd.append('--backends')
cmd.append(getattr(args, 'backends'))
- if _utils._is_valid_attr(args, 'default'):
+ if oneutils.is_valid_attr(args, 'default'):
cmd.append('--default')
cmd.append(getattr(args, 'default'))
- if _utils._is_valid_attr(args, 'work_path'):
+ if oneutils.is_valid_attr(args, 'work_path'):
cmd.append('--work_path')
cmd.append(getattr(args, 'work_path'))
f.write((' '.join(cmd) + '\n').encode())
# run circle-partitoner
- _utils._run(cmd, err_prefix='circle-partitioner', logfile=f)
+ oneutils.run(cmd, err_prefix='circle-partitioner', logfile=f)
def main():
args = _parse_arg(parser)
# parse configuration file
- _utils._parse_cfg(args, 'one-partition')
-
- if _utils._is_valid_attr(args, 'config'):
- config_path = getattr(args, 'config')
- _utils._parse_cfg_and_overwrite(config_path, 'one-partition', args)
+ oneutils.parse_cfg(args.config, 'one-partition', args)
# verify arguments
_verify_arg(parser, args)
if __name__ == '__main__':
- _utils._safemain(main, __file__)
+ oneutils.safemain(main, __file__)
VER_ONNX=1.11.0
VER_ONNXRUNTIME=1.11.0
VER_ONNX_TF=1.10.0
+VER_PYDOT=1.4.2
# Install tensorflow
PIP_TRUSTED_HOST="--trusted-host pypi.org "
PIP_TRUSTED_HOST+="--trusted-host pypi.python.org "
-PIP_TRUSTED_HOST+="--trusted-host files.pythonhost.org "
+PIP_TRUSTED_HOST+="--trusted-host files.pythonhosted.org "
PIP_TRUSTED_HOST+="--trusted-host download.pytorch.org "
PIP_TIMEOUT="--default-timeout=1000 "
${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow
# TODO remove version fix, https://github.com/Samsung/ONE/issues/9240
${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability==0.16.0
+# TODO remove version fix, https://github.com/Samsung/ONE/issues/10481
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_addons==0.16.1
# Install PyTorch and ONNX related
# NOTE set ONE_PREPVENV_TORCH_STABLE to override 'torch_stable.html' URL.
# NOTE refer https://github.com/protocolbuffers/protobuf/issues/10051
# TODO remove this when issue is resolved
${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade protobuf==3.20.1
+
+# Install pydot for visq
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install pydot==${VER_PYDOT}
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+VENV_ACTIVATE=${DRIVER_PATH}/venv/bin/activate
+# NOTE please use venv's python instead of python after `source activation`.
+# This script is called by debian maintainer script, i.e. `postinst`.
+# Since debian maintainer script is called with sudo, `source activation` is ignored.
+VENV_PYTHON=${DRIVER_PATH}/venv/bin/python
+
+if [ ! -f ${VENV_ACTIVATE} ]; then
+ # Create python virtual enviornment
+ python3 -m venv "${DRIVER_PATH}/venv"
+fi
+
+# NOTE version
+# - https://github.com/onnx/onnx/blob/master/docs/Versioning.md
+# - https://github.com/onnx/onnx-tensorflow/blob/master/Versioning.md
+
+VER_TENSORFLOW=2.10.1
+VER_ONNX=1.12.0
+VER_ONNXRUNTIME=1.12.1
+VER_ONNX_TF=1.10.0
+VER_PYDOT=1.4.2
+
+# Install tensorflow
+
+PIP_TRUSTED_HOST="--trusted-host pypi.org "
+PIP_TRUSTED_HOST+="--trusted-host pypi.python.org "
+PIP_TRUSTED_HOST+="--trusted-host files.pythonhosted.org "
+PIP_TRUSTED_HOST+="--trusted-host download.pytorch.org "
+
+PIP_TIMEOUT="--default-timeout=1000 "
+
+PIP_OPTIONS="${PIP_TIMEOUT} ${PIP_TRUSTED_HOST}"
+
+# NOTE $ONE_PREPVENV_PIP_OPTION is to provide additional PIP options
+# such as ceritificate file behind firewall
+# ex) ONE_PREPVENV_PIP_OPTION="--cert SomePrivateCetificate.crt" ./one-prepare-venv
+if [[ ! -z "$ONE_PREPVENV_PIP_OPTION" ]]; then
+ PIP_OPTIONS+=" ${ONE_PREPVENV_PIP_OPTION} "
+fi
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade pip setuptools
+if [ -n "${EXT_TENSORFLOW_WHL}" ]; then
+ ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install ${EXT_TENSORFLOW_WHL}
+else
+ ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW}
+fi
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability
+
+# Install PyTorch and ONNX related
+# NOTE set ONE_PREPVENV_TORCH_STABLE to override 'torch_stable.html' URL.
+# torch_stable.html points to download URL of torch wheel file(s)
+# but sometimes the server gets unstable, especially from in-house CI.
+TORCH_STABLE_URL="https://download.pytorch.org/whl/torch_stable.html"
+if [[ ! -z "$ONE_PREPVENV_TORCH_STABLE" ]]; then
+ TORCH_STABLE_URL="${ONE_PREPVENV_TORCH_STABLE}"
+fi
+# TODO remove torch message
+echo "Torch from '${ONE_PREPVENV_TORCH_STABLE}' -> '${TORCH_STABLE_URL}'"
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.13.1+cpu -f ${TORCH_STABLE_URL}
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX}
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnxruntime==${VER_ONNXRUNTIME}
+
+# Provide install of custom onnx-tf
+if [ -n "${EXT_ONNX_TF_WHL}" ]; then
+ ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install ${EXT_ONNX_TF_WHL}
+else
+ ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx-tf==${VER_ONNX_TF}
+fi
+
+# NOTE refer https://github.com/protocolbuffers/protobuf/issues/10051
+# TODO remove this when issue is resolved
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade protobuf==3.19.6
+
+# Install pydot for visq
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install pydot==${VER_PYDOT}
import os
import sys
-import utils as _utils
+import onelib.utils as oneutils
# TODO Find better way to suppress trackback on error
sys.tracebacklimit = 0
parser = argparse.ArgumentParser(
description='command line tool for profiling backend model', usage=profile_usage)
- _utils._add_default_arg(parser)
+ oneutils.add_default_arg(parser)
# get backend list in the directory
backends_name = [ntpath.basename(f) for f in backends_list]
"""verify given arguments"""
# check if required arguments is given
missing = []
- if not _utils._is_valid_attr(args, 'backend'):
+ if not oneutils.is_valid_attr(args, 'backend'):
missing.append('-b/--backend')
if len(missing):
parser.error('the following arguments are required: ' + ' '.join(missing))
del argv[0]
# split by '--'
args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x]
+ if len(args) == 0:
+ profile_args = parser.parse_args(profile_args)
# one-profile has two interfaces
# 1. one-profile [-h] [-v] [-C CONFIG] [-b BACKEND] [COMMANDS FOR BACKEND]
if len(args) == 1:
profile_args = parser.parse_args(profile_args)
# print version
if len(args) and profile_args.version:
- _utils._print_version_and_exit(__file__)
+ oneutils.print_version_and_exit(__file__)
return profile_args, backend_args, unknown_args
args, backend_args, unknown_args = _parse_arg(parser)
# parse configuration file
- _utils._parse_cfg(args, 'one-profile')
+ oneutils.parse_cfg(args.config, 'one-profile', args)
# verify arguments
_verify_arg(parser, args)
if not profile_path:
raise FileNotFoundError(backend_base + ' not found')
profile_cmd = [profile_path] + backend_args + unknown_args
- if _utils._is_valid_attr(args, 'command'):
+ if oneutils.is_valid_attr(args, 'command'):
profile_cmd += getattr(args, 'command').split()
# run backend driver
- _utils._run(profile_cmd, err_prefix=backend_base)
+ oneutils.run(profile_cmd, err_prefix=backend_base)
if __name__ == '__main__':
import tempfile
import json
-import utils as _utils
-from utils import Command
+import onelib.utils as oneutils
+from onelib.Command import Command
# TODO Find better way to suppress trackback on error
sys.tracebacklimit = 0
parser = argparse.ArgumentParser(
description='command line tool to quantize circle model')
- _utils._add_default_arg(parser)
+ oneutils.add_default_arg(parser)
# input and output path.
parser.add_argument(
def _set_default_values(args):
- if not _utils._is_valid_attr(args, 'input_model_dtype') and not _utils._is_valid_attr(
- args, 'input_dtype'):
+ if not oneutils.is_valid_attr(args,
+ 'input_model_dtype') and not oneutils.is_valid_attr(
+ args, 'input_dtype'):
setattr(args, 'input_model_dtype', 'float32')
- if not _utils._is_valid_attr(args, 'quantized_dtype'):
+ if not oneutils.is_valid_attr(args, 'quantized_dtype'):
setattr(args, 'quantized_dtype', 'uint8')
- if _utils._is_valid_attr(args, 'quant_config'):
+ if oneutils.is_valid_attr(args, 'quant_config'):
# Get quantized_dtype from qconfig file
try:
with open(getattr(args, 'quant_config')) as f:
except json.decoder.JSONDecodeError:
print('Failed to decode ' + getattr(args, 'quant_config') +
'. Please check it is a json file.')
- if not _utils._is_valid_attr(args, 'granularity'):
+ if not oneutils.is_valid_attr(args, 'granularity'):
setattr(args, 'granularity', 'layer')
- if _utils._is_valid_attr(args, 'quant_config'):
+ if oneutils.is_valid_attr(args, 'quant_config'):
# Get granularity from qconfig file
try:
with open(getattr(args, 'quant_config')) as f:
except json.decoder.JSONDecodeError:
print('Failed to decode ' + getattr(args, 'quant_config') +
'. Please check it is a json file.')
- if not _utils._is_valid_attr(args, 'mode'):
+ if not oneutils.is_valid_attr(args, 'mode'):
setattr(args, 'mode', 'percentile')
- if not _utils._is_valid_attr(args, 'min_percentile'):
+ if not oneutils.is_valid_attr(args, 'min_percentile'):
setattr(args, 'min_percentile', '1.0')
- if not _utils._is_valid_attr(args, 'max_percentile'):
+ if not oneutils.is_valid_attr(args, 'max_percentile'):
setattr(args, 'max_percentile', '99.0')
"""verify given arguments"""
# check if required arguments is given
missing = []
- if not _utils._is_valid_attr(args, 'input_path'):
+ if not oneutils.is_valid_attr(args, 'input_path'):
missing.append('-i/--input_path')
- if not _utils._is_valid_attr(args, 'output_path'):
+ if not oneutils.is_valid_attr(args, 'output_path'):
missing.append('-o/--output_path')
- if _utils._is_valid_attr(args, 'force_quantparam'):
- if not _utils._is_valid_attr(args, 'tensor_name'):
+ if oneutils.is_valid_attr(args, 'force_quantparam'):
+ if not oneutils.is_valid_attr(args, 'tensor_name'):
missing.append('--tensor_name')
- if not _utils._is_valid_attr(args, 'scale'):
+ if not oneutils.is_valid_attr(args, 'scale'):
missing.append('--scale')
- if not _utils._is_valid_attr(args, 'zero_point'):
+ if not oneutils.is_valid_attr(args, 'zero_point'):
missing.append('--zero_point')
- if _utils._is_valid_attr(args, 'copy_quantparam'):
- if not _utils._is_valid_attr(args, 'src_tensor_name'):
+ if oneutils.is_valid_attr(args, 'copy_quantparam'):
+ if not oneutils.is_valid_attr(args, 'src_tensor_name'):
missing.append('--src_tensor_name')
- if not _utils._is_valid_attr(args, 'dst_tensor_name'):
+ if not oneutils.is_valid_attr(args, 'dst_tensor_name'):
missing.append('--dst_tensor_name')
if len(missing):
parser.error('the following arguments are required: ' + ' '.join(missing))
- if _utils._is_valid_attr(args, 'force_quantparam'):
+ if oneutils.is_valid_attr(args, 'force_quantparam'):
tensors = getattr(args, 'tensor_name')
scales = getattr(args, 'scale')
zerops = getattr(args, 'zero_point')
if len(tensors) != len(scales) or len(tensors) != len(zerops):
parser.error(
'The same number of tensor_name, scale, and zero_point should be given.')
- if _utils._is_valid_attr(args, 'copy_quantparam'):
+ if oneutils.is_valid_attr(args, 'copy_quantparam'):
src_tensors = getattr(args, 'src_tensor_name')
dst_tensors = getattr(args, 'dst_tensor_name')
if len(src_tensors) != len(dst_tensors):
args = parser.parse_args()
# print version
if args.version:
- _utils._print_version_and_exit(__file__)
+ oneutils.print_version_and_exit(__file__)
return args
def _quantize(args):
- if _utils._is_valid_attr(args, 'force_quantparam'):
+ if oneutils.is_valid_attr(args, 'force_quantparam'):
# write quantization parameters
_write_qparam(args)
return
- if _utils._is_valid_attr(args, 'copy_quantparam'):
+ if oneutils.is_valid_attr(args, 'copy_quantparam'):
# copy quantization parameters
_copy_qparam(args)
return
- if _utils._is_valid_attr(args, 'fake_quantize'):
+ if oneutils.is_valid_attr(args, 'fake_quantize'):
# fake-quantize model
_fake_quantize(args)
return
logfile_path = os.path.realpath(args.output_path) + '.log'
with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
- if _utils._is_valid_attr(args, 'save_intermediate'):
+ if oneutils.is_valid_attr(args, 'save_intermediate'):
tmpdir = os.path.dirname(logfile_path)
# get driver path
circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
## make a command to quantize and dequantize the weights of the model
circle_quantizer_cmd = [circle_quantizer_path]
# verbose
- if _utils._is_valid_attr(args, 'verbose'):
+ if oneutils.is_valid_attr(args, 'verbose'):
circle_quantizer_cmd.append('--verbose')
# quantize_dequantize_weights
circle_quantizer_cmd.append('--quantize_dequantize_weights')
# Use input_model_dtype if it exists. Use input_dtype otherwise.
- if _utils._is_valid_attr(args, 'input_model_dtype'):
+ if oneutils.is_valid_attr(args, 'input_model_dtype'):
circle_quantizer_cmd.append(getattr(args, 'input_model_dtype'))
- elif _utils._is_valid_attr(args, 'input_dtype'):
+ elif oneutils.is_valid_attr(args, 'input_dtype'):
circle_quantizer_cmd.append(getattr(args, 'input_dtype'))
- if _utils._is_valid_attr(args, 'quantized_dtype'):
+ if oneutils.is_valid_attr(args, 'quantized_dtype'):
circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
- if _utils._is_valid_attr(args, 'granularity'):
+ if oneutils.is_valid_attr(args, 'granularity'):
circle_quantizer_cmd.append(getattr(args, 'granularity'))
- if _utils._is_valid_attr(args, 'quant_config'):
+ if oneutils.is_valid_attr(args, 'quant_config'):
# NOTE --config conflicts with --config option in onecc, so
# we use quant_config for one-quantize
circle_quantizer_cmd.append('--config')
circle_quantizer_cmd.append(getattr(args, 'quant_config'))
# input and output path
- if _utils._is_valid_attr(args, 'input_path'):
+ if oneutils.is_valid_attr(args, 'input_path'):
circle_quantizer_cmd.append(getattr(args, 'input_path'))
tmp_weights_fake_quant_path = os.path.join(
tmpdir,
args.input_path))[0]) + '.weights_fake_quant.circle'
circle_quantizer_cmd.append(tmp_weights_fake_quant_path)
# profiling
- if _utils._is_valid_attr(args, 'generate_profile_data'):
+ if oneutils.is_valid_attr(args, 'generate_profile_data'):
circle_quantizer_cmd.append('--generate_profile_data')
f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
# run circle-quantizer
- _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+ oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
tmp_minmax_recorded_path = os.path.join(
tmpdir,
## make a second command to quantize the model using the embedded information
circle_quantizer_cmd = [circle_quantizer_path]
# verbose
- if _utils._is_valid_attr(args, 'verbose'):
+ if oneutils.is_valid_attr(args, 'verbose'):
circle_quantizer_cmd.append('--verbose')
# quantize_dequantize_weights
circle_quantizer_cmd.append('--quantize_with_minmax')
# Use input_model_dtype if it exists. Use input_dtype otherwise.
- if _utils._is_valid_attr(args, 'input_model_dtype'):
+ if oneutils.is_valid_attr(args, 'input_model_dtype'):
circle_quantizer_cmd.append(getattr(args, 'input_model_dtype'))
- elif _utils._is_valid_attr(args, 'input_dtype'):
+ elif oneutils.is_valid_attr(args, 'input_dtype'):
circle_quantizer_cmd.append(getattr(args, 'input_dtype'))
- if _utils._is_valid_attr(args, 'quantized_dtype'):
+ if oneutils.is_valid_attr(args, 'quantized_dtype'):
circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
- if _utils._is_valid_attr(args, 'granularity'):
+ if oneutils.is_valid_attr(args, 'granularity'):
circle_quantizer_cmd.append(getattr(args, 'granularity'))
- if _utils._is_valid_attr(args, 'TF-style_maxpool'):
+ if oneutils.is_valid_attr(args, 'TF-style_maxpool'):
circle_quantizer_cmd.append('--TF-style_maxpool')
- if _utils._is_valid_attr(args, 'input_type'):
+ if oneutils.is_valid_attr(args, 'input_type'):
circle_quantizer_cmd.append('--input_type')
circle_quantizer_cmd.append(getattr(args, 'input_type'))
- if _utils._is_valid_attr(args, 'output_type'):
+ if oneutils.is_valid_attr(args, 'output_type'):
circle_quantizer_cmd.append('--output_type')
circle_quantizer_cmd.append(getattr(args, 'output_type'))
- if _utils._is_valid_attr(args, 'quant_config'):
+ if oneutils.is_valid_attr(args, 'quant_config'):
# NOTE --config conflicts with --config option in onecc, so
# we use quant_config for one-quantize
circle_quantizer_cmd.append('--config')
circle_quantizer_cmd.append(getattr(args, 'quant_config'))
# input and output path
circle_quantizer_cmd.append(tmp_minmax_recorded_path)
- if _utils._is_valid_attr(args, 'output_path'):
+ if oneutils.is_valid_attr(args, 'output_path'):
circle_quantizer_cmd.append(getattr(args, 'output_path'))
# profiling
- if _utils._is_valid_attr(args, 'generate_profile_data'):
+ if oneutils.is_valid_attr(args, 'generate_profile_data'):
circle_quantizer_cmd.append('--generate_profile_data')
f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
# run circle-quantizer
- _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+ oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
# evaluate
- if _utils._is_valid_attr(args, 'evaluate_result'):
+ if oneutils.is_valid_attr(args, 'evaluate_result'):
circle_eval_diff_path = os.path.join(dir_path, 'circle-eval-diff')
quant_model = ""
- if _utils._is_valid_attr(args, 'output_path'):
+ if oneutils.is_valid_attr(args, 'output_path'):
quant_model = getattr(args, 'output_path')
tmp_fake_quant_model = os.path.join(
tmpdir,
# make a command to write qparams to the tensors
circle_quantizer_cmd = [circle_quantizer_path]
# verbose
- if _utils._is_valid_attr(args, 'verbose'):
+ if oneutils.is_valid_attr(args, 'verbose'):
circle_quantizer_cmd.append('--verbose')
- if _utils._is_valid_attr(args, 'tensor_name'):
+ if oneutils.is_valid_attr(args, 'tensor_name'):
tensor_name = getattr(args, 'tensor_name')
- if _utils._is_valid_attr(args, 'scale'):
+ if oneutils.is_valid_attr(args, 'scale'):
scale = getattr(args, 'scale')
- if _utils._is_valid_attr(args, 'zero_point'):
+ if oneutils.is_valid_attr(args, 'zero_point'):
zero_point = getattr(args, 'zero_point')
for (t, s, zp) in zip(tensor_name, scale, zero_point):
circle_quantizer_cmd.append('--force_quantparam')
circle_quantizer_cmd.append(str(s))
circle_quantizer_cmd.append(str(zp))
# input and output path
- if _utils._is_valid_attr(args, 'input_path'):
+ if oneutils.is_valid_attr(args, 'input_path'):
circle_quantizer_cmd.append(getattr(args, 'input_path'))
- if _utils._is_valid_attr(args, 'output_path'):
+ if oneutils.is_valid_attr(args, 'output_path'):
circle_quantizer_cmd.append(getattr(args, 'output_path'))
f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
# run circle-quantizer
- _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+ oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
def _copy_qparam(args):
# make a command to write qparams to the tensors
circle_quantizer_cmd = [circle_quantizer_path]
# verbose
- if _utils._is_valid_attr(args, 'verbose'):
+ if oneutils.is_valid_attr(args, 'verbose'):
circle_quantizer_cmd.append('--verbose')
- if _utils._is_valid_attr(args, 'src_tensor_name'):
+ if oneutils.is_valid_attr(args, 'src_tensor_name'):
src_tensor_name = getattr(args, 'src_tensor_name')
- if _utils._is_valid_attr(args, 'dst_tensor_name'):
+ if oneutils.is_valid_attr(args, 'dst_tensor_name'):
dst_tensor_name = getattr(args, 'dst_tensor_name')
for (src, dst) in zip(src_tensor_name, dst_tensor_name):
circle_quantizer_cmd.append('--copy_quantparam')
circle_quantizer_cmd.append(src)
circle_quantizer_cmd.append(dst)
# input and output path
- if _utils._is_valid_attr(args, 'input_path'):
+ if oneutils.is_valid_attr(args, 'input_path'):
circle_quantizer_cmd.append(getattr(args, 'input_path'))
- if _utils._is_valid_attr(args, 'output_path'):
+ if oneutils.is_valid_attr(args, 'output_path'):
circle_quantizer_cmd.append(getattr(args, 'output_path'))
f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
# run circle-quantizer
- _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+ oneutils.run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
def _fake_quantize(args):
args = _parse_arg(parser)
# parse configuration file
- _utils._parse_cfg(args, 'one-quantize')
+ oneutils.parse_cfg(args.config, 'one-quantize', args)
# set default values
_set_default_values(args)
if __name__ == '__main__':
- _utils._safemain(main, __file__)
+ oneutils.safemain(main, __file__)
from onelib.CfgRunner import CfgRunner
from onelib.WorkflowRunner import WorkflowRunner
-import utils as _utils
+import onelib.utils as oneutils
# TODO Find better way to suppress trackback on error
sys.tracebacklimit = 0
dir_path = os.path.dirname(os.path.realpath(__file__))
driver_path = os.path.join(dir_path, driver_name)
cmd = [driver_path] + options
- _utils._run(cmd)
+ oneutils.run(cmd)
def _check_subtool_exists():
onecc_desc = 'Run ONE driver via several commands or configuration file'
parser = argparse.ArgumentParser(description=onecc_desc, usage=onecc_usage)
- _utils._add_default_arg(parser)
+ oneutils.add_default_arg(parser)
- opt_name_list = _utils._get_optimization_list(get_name=True)
+ opt_name_list = oneutils.get_optimization_list(get_name=True)
opt_name_list = ['-' + s for s in opt_name_list]
if not opt_name_list:
opt_help_message = '(No available optimization options)'
args = parser.parse_args()
# print version
if args.version:
- _utils._print_version_and_exit(__file__)
+ oneutils.print_version_and_exit(__file__)
return args
def _verify_arg(parser, args):
"""verify given arguments"""
# check if required arguments is given
- if not _utils._is_valid_attr(args, 'config') and not _utils._is_valid_attr(
+ if not oneutils.is_valid_attr(args, 'config') and not oneutils.is_valid_attr(
args, 'workflow'):
parser.error('-C/--config or -W/--workflow argument is required')
# check if given optimization option exists
- opt_name_list = _utils._get_optimization_list(get_name=True)
- opt_name_list = [_utils._remove_prefix(s, 'O') for s in opt_name_list]
- if _utils._is_valid_attr(args, 'O'):
+ opt_name_list = oneutils.get_optimization_list(get_name=True)
+ opt_name_list = [oneutils.remove_prefix(s, 'O') for s in opt_name_list]
+ if oneutils.is_valid_attr(args, 'O'):
if ' ' in getattr(args, 'O'):
parser.error('Not allowed to have space in the optimization name')
if not getattr(args, 'O') in opt_name_list:
_verify_arg(parser, args)
bin_dir = os.path.dirname(os.path.realpath(__file__))
- if _utils._is_valid_attr(args, 'config'):
+ if oneutils.is_valid_attr(args, 'config'):
runner = CfgRunner(args.config)
runner.detect_import_drivers(bin_dir)
- if _utils._is_valid_attr(args, 'O'):
+ if oneutils.is_valid_attr(args, 'O'):
runner.add_opt(getattr(args, 'O'))
runner.run(bin_dir)
- elif _utils._is_valid_attr(args, 'workflow'):
+ elif oneutils.is_valid_attr(args, 'workflow'):
runner = WorkflowRunner(args.workflow)
runner.run(bin_dir)
if __name__ == '__main__':
- _utils._safemain(main, __file__)
+ oneutils.safemain(main, __file__)
+; set environment variables
+[Environment]
+ONECC_ENV="ONECC"
+
; To activate a step (or task),
; set True for the step in [onecc] section and fill options in the corresponding section
[onecc]
one-profile=False
; infer
one-infer=False
+; group option
+; multiple group options are allowed
+include=O1
+# include=O1 O2 OMY_OPT
[one-import-tf]
# mandatory
import os
import warnings
-import utils as oneutils
+import onelib.utils as oneutils
def _simple_warning(message, category, filename, lineno, file=None, line=None):
self.import_drivers = [
'one-import-bcq', 'one-import-onnx', 'one-import-tf', 'one-import-tflite'
]
+ # parse group option
+ GROUP_OPTION_KEY = 'include'
+ if self.cfgparser.has_option('onecc', GROUP_OPTION_KEY):
+ groups = self.cfgparser['onecc'][GROUP_OPTION_KEY].split()
+ for o in groups:
+ if o == 'O' or not o.startswith('O'):
+ raise ValueError('Invalid group option')
+ # add_opt receives group name except first 'O'
+ self.add_opt(o[1:])
def _verify_cfg(self, cfgparser):
if not cfgparser.has_section('onecc'):
# make option names case sensitive
self.optparser.optionxform = str
opt_book = dict(
- zip(oneutils._get_optimization_list(get_name=True),
- oneutils._get_optimization_list()))
+ zip(oneutils.get_optimization_list(get_name=True),
+ oneutils.get_optimization_list()))
parsed = self.optparser.read(opt_book['O' + opt])
if not parsed:
raise FileNotFoundError('Not found given optimization configuration file')
self.opt = opt
def detect_import_drivers(self, dir):
- self.import_drivers = list(oneutils._detect_one_import_drivers(dir).keys())
+ self.import_drivers = list(oneutils.detect_one_import_drivers(dir).keys())
def run(self, working_dir, verbose=False):
+ # set environment
+ CFG_ENV_SECTION = 'Environment'
+ if self.cfgparser.has_section(CFG_ENV_SECTION):
+ for key in self.cfgparser[CFG_ENV_SECTION]:
+ os.environ[key] = self.cfgparser[CFG_ENV_SECTION][key]
+
section_to_run = []
for d in self.import_drivers + self.driver_sequence:
if self._is_available(d):
options.append('--verbose')
driver_path = os.path.join(working_dir, section)
cmd = [driver_path] + options
- oneutils._run(cmd)
+ oneutils.run(cmd)
--- /dev/null
+#!/usr/bin/env python
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import onelib.utils as oneutils
+
+
+class Command:
+ def __init__(self, driver, args, log_file):
+ self.cmd = [driver]
+ self.driver = driver
+ self.args = args
+ self.log_file = log_file
+
+ # Add option if attrs are valid
+ # Option values are collected from self.args
+ def add_option_with_valid_args(self, option, attrs):
+ for attr in attrs:
+ if not oneutils.is_valid_attr(self.args, attr):
+ return self
+ self.cmd.append(option)
+ for attr in attrs:
+ self.cmd.append(getattr(self.args, attr))
+ return self
+
+ # Add option and values without any condition
+ def add_option_with_values(self, option, values):
+ self.cmd.append(option)
+ for value in values:
+ self.cmd.append(value)
+ return self
+
+ # Add option with no argument (ex: --verbose) if attr is valid
+ def add_noarg_option_if_valid_arg(self, option, attr):
+ if oneutils.is_valid_attr(self.args, attr):
+ self.cmd.append(option)
+ return self
+
+ # Run cmd and save logs
+ def run(self):
+ self.log_file.write((' '.join(self.cmd) + '\n').encode())
+ oneutils.run(self.cmd, err_prefix=self.driver, logfile=self.log_file)
from onelib.OptionBuilder import OptionBuilder
from onelib.TopologicalSortHelper import TopologicalSortHelper
from onelib.CfgRunner import CfgRunner
-import utils as oneutils
+import onelib.utils as oneutils
class WorkflowRunner:
# get the absolute path of the caller
driver_path = os.path.join(working_dir, driver_name)
cmd = [driver_path] + options
- oneutils._run(cmd)
+ oneutils.run(cmd)
elif self.CFG_REFERENCE_K in workflow:
cfg_path = workflow[self.CFG_REFERENCE_K]['path']
runner = CfgRunner(cfg_path)
class CONSTANT:
__slots__ = () # This prevents access via __dict__.
+
+ # Basic optimization passes
+ # These passes do not change the execution result of the model
+ O1 = (
+ # Constant folding
+ 'fold_add_v2',
+ 'fold_cast',
+ 'fold_densify',
+ 'fold_dequantize',
+ 'fold_dwconv',
+ 'fold_fully_connected',
+ 'fold_gather',
+ 'fold_sparse_to_dense',
+
+ # Operator fusion
+ 'fuse_add_with_tconv',
+ 'fuse_add_with_fully_connected',
+ 'fuse_batchnorm_with_conv',
+ 'fuse_batchnorm_with_dwconv',
+ 'fuse_batchnorm_with_tconv',
+ 'fuse_activation_function',
+ 'fuse_instnorm',
+ 'fuse_prelu',
+ 'fuse_mean_with_mean',
+ 'fuse_transpose_with_mean',
+ 'transform_min_max_to_relu6',
+ 'transform_min_relu_to_relu6',
+
+ # Remove redundant operators
+ 'remove_redundant_reshape',
+ 'remove_redundant_transpose',
+ 'remove_unnecessary_reshape',
+ 'remove_unnecessary_slice',
+ 'remove_unnecessary_strided_slice',
+ 'remove_unnecessary_split',
+
+ # Canonicalization
+ # (passes to help further optimization)
+ 'resolve_customop_add',
+ 'resolve_customop_batchmatmul',
+ 'resolve_customop_matmul',
+ 'resolve_customop_max_pool_with_argmax',
+ 'resolve_customop_splitv',
+ 'substitute_pack_to_reshape',
+ 'substitute_padv2_to_pad',
+ 'substitute_splitv_to_split',
+ 'substitute_squeeze_to_reshape',
+ 'substitute_strided_slice_to_reshape',
+ 'substitute_transpose_to_reshape',
+ 'forward_reshape_to_unaryop',
+ 'forward_transpose_op',
+ 'replace_non_const_fc_with_batch_matmul', # For quantization
+ )
+
OPTIMIZATION_OPTS = (
# (OPTION_NAME, HELP_MESSAGE)
('convert_nchw_to_nhwc',
('fold_densify', 'fold Densify op with sparse constant input'),
('fold_dequantize', 'fold Dequantize op'),
('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'),
+ ('fold_fully_connected', 'fold FullyConnected op with constant inputs'),
('fold_gather', 'fold Gather op'),
('fold_sparse_to_dense', 'fold SparseToDense op'),
('forward_reshape_to_unaryop', 'Forward Reshape op'),
+ ('forward_transpose_op', 'Forward Transpose op'),
('fuse_add_with_tconv', 'fuse Add op to Transposed'),
('fuse_add_with_fully_connected', 'fuse Add op to FullyConnected op'),
('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'),
' So, use it only when the impact is known to be acceptable.'),
('fuse_activation_function', 'fuse Activation function to a preceding operator'),
('fuse_instnorm', 'fuse ops to InstanceNorm operator'),
+ ('fuse_prelu', 'fuse ops to PReLU operator'),
('replace_cw_mul_add_with_depthwise_conv',
'replace channel-wise Mul/Add with DepthwiseConv2D'),
('remove_fakequant', 'remove FakeQuant ops'),
('substitute_transpose_to_reshape',
'convert certain condition Transpose to Reshape'),
('transform_min_max_to_relu6', 'transform Minimum-Maximum pattern to Relu6 op'),
- ('transform_min_relu_to_relu6', 'transform Minimum(6)-Relu pattern to Relu6 op'))
+ ('transform_min_relu_to_relu6', 'transform Minimum(6)-Relu pattern to Relu6 op'),
+ ('unroll_unidirseqlstm', 'unroll UnidirectionalSequenceLSTM op'))
CONSTANT = CONSTANT()
--- /dev/null
+#!/usr/bin/env python
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from constant import CONSTANT
+
+import argparse
+import configparser
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description='Export CONSTANT value with given file format.')
+ parser.add_argument(
+ '-c', '--constant', type=str, required=True, help='Constant name to export')
+ parser.add_argument(
+ '-f',
+ '--format',
+ type=str,
+ required=True,
+ choices=['cfg', 'txt'],
+ help=
+ 'File format to export. The created cfg file contains CONSTANT under the one-optimize section.'
+ )
+ parser.add_argument(
+ '--exclusive',
+ action='store_true',
+ help='Exports the rest of the options except for the given constant')
+ parser.add_argument(
+ '-o', '--output_path', type=str, required=True, help='Path to output')
+
+ args = parser.parse_args()
+
+ if not hasattr(CONSTANT, args.constant):
+ raise NameError('Not found given constant name')
+
+ if args.exclusive:
+ constant_to_exclude = getattr(CONSTANT, args.constant)
+ constant_to_export = []
+ for opt in CONSTANT.OPTIMIZATION_OPTS:
+ if opt[0] in constant_to_exclude:
+ continue
+ constant_to_export.append(opt[0])
+ else:
+ constant_to_export = getattr(CONSTANT, args.constant)
+
+ if args.format == 'cfg':
+ SECTION_TO_EXPORT = 'one-optimize'
+ config = configparser.ConfigParser()
+ config[SECTION_TO_EXPORT] = dict()
+ for constant in constant_to_export:
+ config[SECTION_TO_EXPORT][constant] = 'True'
+
+ with open(args.output_path, 'w') as f:
+ config.write(f)
+
+ if args.format == 'txt':
+ with open(args.output_path, 'w') as f:
+ for constant in constant_to_export:
+ f.write(f"{constant}\n")
+
+
+if __name__ == '__main__':
+ main()
import onelib.constant as _constant
-def _is_valid_attr(args, attr):
+def is_valid_attr(args, attr):
return hasattr(args, attr) and getattr(args, attr)
"""make a command for running tf2tfliteV2.py"""
cmd = [sys.executable, os.path.expanduser(driver_path)]
# verbose
- if _is_valid_attr(args, 'verbose'):
+ if is_valid_attr(args, 'verbose'):
cmd.append('--verbose')
# model_format
- if _is_valid_attr(args, 'model_format_cmd'):
+ if is_valid_attr(args, 'model_format_cmd'):
cmd.append(getattr(args, 'model_format_cmd'))
- elif _is_valid_attr(args, 'model_format'):
+ elif is_valid_attr(args, 'model_format'):
cmd.append('--' + getattr(args, 'model_format'))
else:
cmd.append('--graph_def') # default value
# converter version
- if _is_valid_attr(args, 'converter_version_cmd'):
+ if is_valid_attr(args, 'converter_version_cmd'):
cmd.append(getattr(args, 'converter_version_cmd'))
- elif _is_valid_attr(args, 'converter_version'):
+ elif is_valid_attr(args, 'converter_version'):
cmd.append('--' + getattr(args, 'converter_version'))
else:
cmd.append('--v1') # default value
# input_path
- if _is_valid_attr(args, 'input_path'):
+ if is_valid_attr(args, 'input_path'):
cmd.append('--input_path')
cmd.append(os.path.expanduser(input_path))
# output_path
- if _is_valid_attr(args, 'output_path'):
+ if is_valid_attr(args, 'output_path'):
cmd.append('--output_path')
cmd.append(os.path.expanduser(output_path))
# input_arrays
- if _is_valid_attr(args, 'input_arrays'):
+ if is_valid_attr(args, 'input_arrays'):
cmd.append('--input_arrays')
cmd.append(getattr(args, 'input_arrays'))
# input_shapes
- if _is_valid_attr(args, 'input_shapes'):
+ if is_valid_attr(args, 'input_shapes'):
cmd.append('--input_shapes')
cmd.append(getattr(args, 'input_shapes'))
# output_arrays
- if _is_valid_attr(args, 'output_arrays'):
+ if is_valid_attr(args, 'output_arrays'):
cmd.append('--output_arrays')
cmd.append(getattr(args, 'output_arrays'))
# experimental options
- if _is_valid_attr(args, 'experimental_disable_batchmatmul_unfold'):
+ if is_valid_attr(args, 'experimental_disable_batchmatmul_unfold'):
cmd.append('--experimental_disable_batchmatmul_unfold')
return cmd
"""make a command for running circle2circle"""
cmd = [os.path.expanduser(c) for c in [driver_path, input_path, output_path]]
# profiling
- if _is_valid_attr(args, 'generate_profile_data'):
+ if is_valid_attr(args, 'generate_profile_data'):
cmd.append('--generate_profile_data')
# optimization pass(only true/false options)
# TODO support options whose number of arguments is more than zero
for opt in _constant.CONSTANT.OPTIMIZATION_OPTS:
- if _is_valid_attr(args, opt[0]):
+ if is_valid_attr(args, opt[0]):
# ./driver --opt[0]
if type(getattr(args, opt[0])) is bool:
cmd.append('--' + opt[0])
--- /dev/null
+#!/usr/bin/env python
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import configparser
+import glob
+import importlib
+import ntpath
+import os
+import subprocess
+import sys
+
+from typing import Union
+
+import onelib.constant as _constant
+
+
+def add_default_arg(parser):
+ # version
+ parser.add_argument(
+ '-v',
+ '--version',
+ action='store_true',
+ help='show program\'s version number and exit')
+
+ # verbose
+ parser.add_argument(
+ '-V',
+ '--verbose',
+ action='store_true',
+ help='output additional information to stdout or stderr')
+
+ # configuration file
+ parser.add_argument('-C', '--config', type=str, help='run with configuation file')
+ # section name that you want to run in configuration file
+ parser.add_argument('-S', '--section', type=str, help=argparse.SUPPRESS)
+
+
+def add_default_arg_no_CS(parser):
+ """
+ This adds -v -V args only (no -C nor -S)
+ """
+ # version
+ parser.add_argument(
+ '-v',
+ '--version',
+ action='store_true',
+ help='show program\'s version number and exit')
+
+ # verbose
+ parser.add_argument(
+ '-V',
+ '--verbose',
+ action='store_true',
+ help='output additional information to stdout or stderr')
+
+
+def is_accumulated_arg(arg, driver):
+ if driver == "one-quantize":
+ accumulables = [
+ "tensor_name", "scale", "zero_point", "src_tensor_name", "dst_tensor_name"
+ ]
+ if arg in accumulables:
+ return True
+
+ return False
+
+
+def is_valid_attr(args, attr):
+ return hasattr(args, attr) and getattr(args, attr)
+
+
+def parse_cfg_and_overwrite(config_path, section, args):
+ """
+ parse given section of configuration file and set the values of args.
+ Even if the values parsed from the configuration file already exist in args,
+ the values are overwritten.
+ """
+ if config_path == None:
+ # DO NOTHING
+ return
+ config = configparser.ConfigParser()
+ # make option names case sensitive
+ config.optionxform = str
+ parsed = config.read(config_path)
+ if not parsed:
+ raise FileNotFoundError('Not found given configuration file')
+ if not config.has_section(section):
+ raise AssertionError('configuration file doesn\'t have \'' + section +
+ '\' section')
+ for key in config[section]:
+ setattr(args, key, config[section][key])
+ # TODO support accumulated arguments
+
+
+def parse_cfg(config_path: Union[str, None], section_to_parse: str, args):
+ """
+ parse configuration file and store the information to args
+
+ :param config_path: path to configuration file
+ :param section_to_parse: section name to parse
+ :param args: object to store the parsed information
+ """
+ if config_path is None:
+ return
+
+ parser = configparser.ConfigParser()
+ parser.optionxform = str
+ parser.read(config_path)
+
+ if not parser.has_section(section_to_parse):
+ raise AssertionError('configuration file must have \'' + section_to_parse +
+ '\' section')
+
+ for key in parser[section_to_parse]:
+ if is_accumulated_arg(key, section_to_parse):
+ if not is_valid_attr(args, key):
+ setattr(args, key, [parser[section_to_parse][key]])
+ else:
+ getattr(args, key).append(parser[section_to_parse][key])
+ continue
+ if hasattr(args, key) and getattr(args, key):
+ continue
+ setattr(args, key, parser[section_to_parse][key])
+
+
+def print_version_and_exit(file_path):
+ """print version of the file located in the file_path"""
+ script_path = os.path.realpath(file_path)
+ dir_path = os.path.dirname(script_path)
+ script_name = os.path.splitext(os.path.basename(script_path))[0]
+ # run one-version
+ subprocess.call([os.path.join(dir_path, 'one-version'), script_name])
+ sys.exit()
+
+
+def safemain(main, mainpath):
+ """execute given method and print with program name for all uncaught exceptions"""
+ try:
+ main()
+ except Exception as e:
+ prog_name = os.path.basename(mainpath)
+ print(f"{prog_name}: {type(e).__name__}: " + str(e), file=sys.stderr)
+ sys.exit(255)
+
+
+def run(cmd, err_prefix=None, logfile=None):
+ """Execute command in subprocess
+
+ Args:
+ cmd: command to be executed in subprocess
+ err_prefix: prefix to be put before every stderr lines
+ logfile: file stream to which both of stdout and stderr lines will be written
+ """
+ with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p:
+ import select
+ inputs = set([p.stdout, p.stderr])
+ while inputs:
+ readable, _, _ = select.select(inputs, [], [])
+ for x in readable:
+ line = x.readline()
+ if len(line) == 0:
+ inputs.discard(x)
+ continue
+ if x == p.stdout:
+ out = sys.stdout
+ if x == p.stderr:
+ out = sys.stderr
+ if err_prefix:
+ line = f"{err_prefix}: ".encode() + line
+ out.buffer.write(line)
+ out.buffer.flush()
+ if logfile != None:
+ logfile.write(line)
+ if p.returncode != 0:
+ sys.exit(p.returncode)
+
+
+def remove_prefix(str, prefix):
+ if str.startswith(prefix):
+ return str[len(prefix):]
+ return str
+
+
+def remove_suffix(str, suffix):
+ if str.endswith(suffix):
+ return str[:-len(suffix)]
+ return str
+
+
+def get_optimization_list(get_name=False):
+ """
+ returns a list of optimization. If `get_name` is True,
+ only basename without extension is returned rather than full file path.
+
+ [one hierarchy]
+ one
+ ├── backends
+ ├── bin
+ ├── doc
+ ├── include
+ ├── lib
+ ├── optimization
+ └── test
+
+ Optimization options must be placed in `optimization` folder
+ """
+ dir_path = os.path.dirname(os.path.realpath(__file__))
+
+ # optimization folder
+ files = [
+ f for f in glob.glob(dir_path + '/../../optimization/O*.cfg', recursive=True)
+ ]
+ # exclude if the name has space
+ files = [s for s in files if not ' ' in s]
+
+ opt_list = []
+ for cand in files:
+ base = ntpath.basename(cand)
+ if os.path.isfile(cand) and os.access(cand, os.R_OK):
+ opt_list.append(cand)
+
+ if get_name == True:
+ # NOTE the name includes prefix 'O'
+ # e.g. O1, O2, ONCHW not just 1, 2, NCHW
+ opt_list = [ntpath.basename(f) for f in opt_list]
+ opt_list = [remove_suffix(s, '.cfg') for s in opt_list]
+
+ return opt_list
+
+
+def detect_one_import_drivers(search_path):
+ """Looks for import drivers in given directory
+
+ Args:
+ search_path: path to the directory where to search import drivers
+
+ Returns:
+ dict: each entry is related to single detected driver,
+ key is a config section name, value is a driver name
+
+ """
+ import_drivers_dict = {}
+ for module_name in os.listdir(search_path):
+ full_path = os.path.join(search_path, module_name)
+ if not os.path.isfile(full_path):
+ continue
+ if module_name.find("one-import-") != 0:
+ continue
+ module_loader = importlib.machinery.SourceFileLoader(module_name, full_path)
+ module_spec = importlib.util.spec_from_loader(module_name, module_loader)
+ module = importlib.util.module_from_spec(module_spec)
+ try:
+ module_loader.exec_module(module)
+ if hasattr(module, "get_driver_cfg_section"):
+ section = module.get_driver_cfg_section()
+ import_drivers_dict[section] = module_name
+ except:
+ pass
+ return import_drivers_dict
if(ENABLE_ONE_IMPORT_PYTORCH)
add_subdirectory(pytorch-operations)
endif(ENABLE_ONE_IMPORT_PYTORCH)
+
+# Generate group option list for tests
+get_filename_component(ONE_CMDS_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
+set(ONE_PYTHON_DIR "onelib")
+set(CONSTANT_EXPORTING_SCRIPT "${ONE_CMDS_DIR}/${ONE_PYTHON_DIR}/export_constant.py")
+set(O1_OPTION "O1")
+set(O1_TXT_FILE "${O1_OPTION}.list")
+set(O1_TXT_FILE_BIN "${CMAKE_CURRENT_BINARY_DIR}/${O1_TXT_FILE}")
+set(NON_O1_TXT_FILE "non-${O1_OPTION}.list")
+set(NON_O1_TXT_FILE_BIN "${CMAKE_CURRENT_BINARY_DIR}/${NON_O1_TXT_FILE}")
+
+add_custom_command(OUTPUT ${O1_TXT_FILE_BIN}
+ COMMAND ${PYTHON_EXECUTABLE} ${CONSTANT_EXPORTING_SCRIPT} --constant ${O1_OPTION}
+ --format txt --output_path ${O1_TXT_FILE_BIN}
+ DEPENDS ${CONSTANT_EXPORTING_SCRIPT}
+ COMMENT "Generate ${O1_TXT_FILE}"
+)
+
+add_custom_command(OUTPUT ${NON_O1_TXT_FILE_BIN}
+ COMMAND ${PYTHON_EXECUTABLE} ${CONSTANT_EXPORTING_SCRIPT} --constant ${O1_OPTION}
+ --format txt --output_path ${NON_O1_TXT_FILE_BIN}
+ --exclusive
+ DEPENDS ${CONSTANT_EXPORTING_SCRIPT}
+ COMMENT "Generate ${NON_O1_TXT_FILE}"
+)
+
+add_custom_target("O1_txt_target" ALL DEPENDS ${O1_TXT_FILE_BIN} ${NON_O1_TXT_FILE_BIN})
+
+install(FILES ${O1_TXT_FILE_BIN}
+ PERMISSIONS OWNER_WRITE OWNER_READ
+ GROUP_READ
+ WORLD_READ
+ DESTINATION test)
+
+install(FILES ${NON_O1_TXT_FILE_BIN}
+ PERMISSIONS OWNER_WRITE OWNER_READ
+ GROUP_READ
+ WORLD_READ
+ DESTINATION test)
configfile="one-build_001.cfg"
outputfile="inception_v3.opt.circle"
+rm -f ${filename}.log
+rm -f ${outputfile}
+
# run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="one-build_002.cfg"
outputfile="inception_v3_pkg"
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
# run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="one-build_003.cfg"
outputfile="inception_v3.quantized.circle"
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
# run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="one-build_004.cfg"
outputfile="sample.tvn"
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
# run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="one-build_005.cfg"
outputfile="sample.tvn"
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
# run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="one-build_006.cfg"
outputfile="sample.tvn"
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
# run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="one-build_007.cfg"
outputfile="inception_v3_pkg"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="one-build_008.cfg"
outputfile="test_onnx_model.bin"
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
# run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="one-build_009.cfg"
outputfile="onnx_conv2d_conv2d.bin"
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
# run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
outputfile="inception_v3.alt.circle"
intermfile="inception_v3.alt.tflite"
-rm -rf ${outputfile}
-rm -rf ${intermfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
+rm -f ${intermfile}
# run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
outputfile="test_onnx_model.circle"
intermfile="test_onnx_model.tflite"
-rm -rf ${outputfile}
-rm -rf ${intermfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
+rm -f ${intermfile}
# run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="one-build_012.cfg"
outputfile="inception_v3.list.quantized.circle"
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
# run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="one-build_013.cfg"
outputfile="inception_v3.dir.quantized.circle"
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
# run test
-one-build -C ${configfile} > /dev/null 2>&1
+one-build -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="one-build_014.cfg"
outputfile="inception_v3.opt.circle"
-rm -rf ${outputfile}
+rm -f ${filename}.log
+rm -f ${outputfile}
if [ ! -d "../optimization" ]; then
mkdir -p ../optimization
clean_envir
-if grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then
+if ! grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then
echo "${filename_ext} SUCCESS"
exit 0
fi
configfile="one-build_neg_002.cfg"
+rm -f ${filename}.log
+
# run test
one-build -C ${configfile} > ${filename}.log 2>&1
configfile="one-build_neg_003.cfg"
+rm -f ${filename}.log
+
# run test
one-build -C ${configfile} > ${filename}.log 2>&1
configfile="one-build_neg_004.cfg"
+rm -f ${filename}.log
+
# run test
one-build -C ${configfile} > ${filename}.log 2>&1
rm -rf ${outputfile}
rm -rf ${intermfile}
+rm -f ${filename}.log
# run test
one-build -C ${configfile} > ${filename}.log 2>&1
rm -rf ${outputfile}
rm -rf ${intermfile}
+rm -f ${filename}.log
# run test
one-build -C ${configfile} > ${filename}.log 2>&1
configfile=".."
+rm -f ${filename}.log
+
# run test
one-build -C ${configfile} -OONE_BUILD_NEG_007 > ${filename}.log 2>&1
configfile=".."
+rm -f ${filename}.log
+
# run test
one-build -C ${configfile} -OONE_BUILD_NEG_008 > ${filename}.log 2>&1
configfile=".."
+rm -f ${filename}.log
+
# run test
one-build -C ${configfile} "-O SPACE OPTION" > ${filename}.log 2>&1
trap trap_err_onexit ERR
+rm -f ${filename}.log
+
# copy help-compile to bin folder
cp help-compile ../bin/help-compile
outputfile="sample.tvn"
rm -rf ${outputfile}
+rm -f ${filename}.log
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
outputfile="sample.tvn"
rm -rf ${outputfile}
+rm -f ${filename}.log
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
trap trap_err_onexit ERR
+rm -f ${filename}.log
+
# run test
one-codegen -h > ${filename}.log 2>&1
trap trap_err_onexit ERR
+rm -f ${filename}.log
+
# run test
one-codegen > ${filename}.log 2>&1
inputfile="./bcq.pb"
outputfile="./bcq.circle"
+rm -f ${filename}.log
rm -rf $outputfile
# run test
--input_path ${inputfile} \
--output_path ${outputfile} \
--input_arrays Placeholder \
---output_arrays MatMul > /dev/null 2>&1
+--output_arrays MatMul > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
outputfile="./bcq.circle"
rm -rf ${outputfile}
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-import-bcq \
outputfile="./bcq.circle"
rm -rf ${outputfile}
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-import-bcq \
outputfile="./bcq.circle"
rm -rf ${outputfile}
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-import-bcq \
outputfile="./bcq.circle"
rm -rf ${outputfile}
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-import-bcq \
inputfile="./bcq.pb"
outputfile="."
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-import-bcq \
inputfile="./bcq.pb"
outputfile="./bcq.circle"
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-import-bcq \
inputfile="./bcq.pb"
outputfile="./bcq.circle"
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-import-bcq \
inputfile="./bcq.pb"
outputfile="./bcq.circle"
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-import-bcq \
inputfile="./bcq.pb"
outputfile="./bcq.circle"
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-import-bcq \
outputfile="./test_onnx_model.circle"
rm -rf ${outputfile}
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-import-onnx \
--input_path ${inputfile} \
---output_path ${outputfile} > ${outputfile}.log 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
trap trap_err_onexit ERR
inputfile="./reshape_matmul.onnx"
-outputfile="./reshape_matmul.circle"
+outputfile="./reshape_matmul.one-import-onnx_002.circle"
rm -rf ${outputfile}
rm -rf ${outputfile}.log
trap_err_onexit
fi
-circle-operator --code reshape_matmul.circle > ${outputfile}.log 2>&1
+circle-operator --code ${outputfile} > ${outputfile}.log 2>&1
if ! grep -q "FULLY_CONNECTED" "${outputfile}.log"; then
trap_err_onexit
trap_err_onexit
fi
-circle-operator --code reshape_matmul.circle > ${outputfile}.log 2>&1
+circle-operator --code ${outputfile} > ${outputfile}.log 2>&1
if ! grep -q "BATCH_MATMUL" "${outputfile}.log"; then
trap_err_onexit
outputfile="./inception_v3.circle"
# Note: Do not remove output circle file as it's used for quantize tests
+rm -f ${filename}.log
# run test
one-import tf \
--input_path ${inputfile} \
--output_path ${outputfile} \
--input_arrays input --input_shapes "1,299,299,3" \
---output_arrays InceptionV3/Predictions/Reshape_1 > /dev/null 2>&1
+--output_arrays InceptionV3/Predictions/Reshape_1 > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
[one-import-tf]
input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3_cfg.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
trap trap_err_onexit ERR
configfile="one-import_002.cfg"
-outputfile="inception_v3_cmd.circle"
+outputfile_cmd="inception_v3_cmd.circle"
+outputfile_cfg="inception_v3_cfg.circle"
+
+rm -f ${filename}.log
+rm -f ${outputfile_cmd}
+rm -f ${outputfile_cfg}
# run test
one-import tf -C ${configfile} \
---output_path=inception_v3_cmd.circle > /dev/null 2>&1
+--output_path=${outputfile_cmd} > ${filename}.log 2>&1
-if [[ ! -s "${outputfile}" ]]; then
+if [[ ! -s "${outputfile_cmd}" ]]; then
trap_err_onexit
fi
configfile="one-import_003.cfg"
outputfile="test_saved_model.circle"
+rm -f ${filename}.log
rm -f ${outputfile}
# run test
-one-import tf -C ${configfile} > /dev/null 2>&1
+one-import tf -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="one-import_004.cfg"
outputfile="test_keras_model.circle"
+rm -f ${filename}.log
rm -f ${outputfile}
# run test
-one-import tf -C ${configfile} > /dev/null 2>&1
+one-import tf -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
outputfile="test_onnx_model.circle"
rm -f ${outputfile}
+rm -f ${filename}.log
# run test
one-import onnx -C ${configfile} > ${filename}.log 2>&1
outputfile="test_onnx_model.circle"
rm -f ${outputfile}
+rm -f ${filename}.log
# run test
one-import onnx -i ${inputfile} -o ${outputfile} > ${filename}.log 2>&1
inputfile="./inception_v3.tflite"
outputfile="./inception_v3.circle"
-rm -rf ${outputfile}
+# do not remove output file
+# rm -rf ${outputfile}
rm -rf ${filename}.log
# run test
inputfile="./inception_v3.pb"
outputfile="./inception_v3.circle"
-rm -rf ${outputfile}
+# do not remove output file
+# rm -rf ${outputfile}
rm -rf ${filename}.log
# run test
inputfile="./inception_v3.pb"
outputfile="./inception_v3.circle"
-rm -rf ${outputfile}
+# do not remove output file
+# rm -rf ${outputfile}
rm -rf ${filename}.log
# run test
trap trap_err_onexit ERR
inputfile="./inception_v3.pb"
-outputfile="./inception_v3.circle"
+outputfile="./inception_v3.imp_neg_005.circle"
rm -rf ${outputfile}
rm -rf ${filename}.log
trap trap_err_onexit ERR
inputfile="./inception_v3.pb"
-outputfile="./inception_v3.circle"
+outputfile="./inception_v3.imp_neg_006.circle"
rm -rf ${outputfile}
rm -rf ${filename}.log
trap trap_err_onexit ERR
inputfile="./inception_v3.pb"
-outputfile="./inception_v3.circle"
+outputfile="./inception_v3.imp_neg_007.circle"
rm -rf ${outputfile}
rm -rf ${filename}.log
trap trap_err_onexit ERR
inputfile="./inception_v3.pb"
-outputfile="./inception_v3.circle"
+outputfile="./inception_v3.imp_neg_008.circle"
rm -rf ${outputfile}
rm -rf ${filename}.log
trap trap_err_onexit ERR
inputfile="./inception_v3.pb"
-outputfile="./inception_v3.circle"
+outputfile="./inception_v3.imp_neg_010.circle"
rm -rf ${outputfile}
rm -rf ${filename}.log
# copy help-infer to bin folder
cp help-infer ../bin/help-infer
+rm -f ${filename}.log
+
# run test
-one-infer -b help -- -h > ${filename}.log
+one-infer -d help-infer -- -h > ${filename}.log
rm -rf ../bin/help-infer
# copy dummy-infer to bin folder
cp dummy-infer ../bin/dummy-infer
+rm -f ${filename}.log
+
# run test
one-infer -d dummy-infer -- ${inputfile} > ${filename}.log
# See the License for the specific language governing permissions and
# limitations under the License.
+# print one-infer's help message
+
filename_ext="$(basename -- $0)"
filename="${filename_ext%.*}"
trap_err_onexit()
{
echo "${filename_ext} FAILED"
- rm -rf ../bin/dummy-infer
exit 255
}
trap trap_err_onexit ERR
-inputfile="sample.tvn"
-
-if [[ ! -s "${inputfile}" ]]; then
- touch ${inputfile}
-fi
-
-# copy dummy-infer to bin folder
-cp dummy-infer ../bin/dummy-infer
+rm -f ${filename}.log
# run test
-one-infer -b dummy -- ${inputfile} > ${filename}.log
-
-rm -rf ../bin/dummy-infer
+one-infer -h > ${filename}.log
-if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+if grep -q "command line tool to infer model" "${filename}.log"; then
echo "${filename_ext} SUCCESS"
exit 0
fi
--- /dev/null
+[one-infer]
+driver=dummy-infer
+command=sample.tvn
# See the License for the specific language governing permissions and
# limitations under the License.
-# print one-infer's help message
+# one-infer with configuration input
filename_ext="$(basename -- $0)"
filename="${filename_ext%.*}"
trap_err_onexit()
{
echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-infer
exit 255
}
trap trap_err_onexit ERR
+configfile="one-infer_004.cfg"
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+ touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+rm -f ${filename}.log
+
# run test
-one-infer -h > ${filename}.log
+one-infer -C ${configfile} > ${filename}.log
+
+rm -rf ../bin/dummy-infer
-if grep -q "command line tool to infer model" "${filename}.log"; then
+if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
echo "${filename_ext} SUCCESS"
exit 0
fi
+++ /dev/null
-[one-infer]
-backend=dummy
-command=sample.tvn
# See the License for the specific language governing permissions and
# limitations under the License.
-# one-infer with configuration input
+# one-infer with post process script
filename_ext="$(basename -- $0)"
filename="${filename_ext%.*}"
trap trap_err_onexit ERR
-configfile="one-infer_005.cfg"
inputfile="sample.tvn"
if [[ ! -s "${inputfile}" ]]; then
# copy dummy-infer to bin folder
cp dummy-infer ../bin/dummy-infer
+rm -f ${filename}.log
+
# run test
-one-infer -C ${configfile} > ${filename}.log
+one-infer -d dummy-infer --post-process "./one-infer-test-post-process.py TOKEN" -- ${inputfile} > ${filename}.log 2>&1
+return_code=$?
rm -rf ../bin/dummy-infer
if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
- echo "${filename_ext} SUCCESS"
- exit 0
+ if [ "$return_code" -eq "0" ]; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
fi
trap_err_onexit
+++ /dev/null
-#!/bin/bash
-
-# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# one-infer with post process script
-
-filename_ext="$(basename -- $0)"
-filename="${filename_ext%.*}"
-
-trap_err_onexit()
-{
- echo "${filename_ext} FAILED"
- rm -rf ../bin/dummy-infer
- exit 255
-}
-
-trap trap_err_onexit ERR
-
-inputfile="sample.tvn"
-
-if [[ ! -s "${inputfile}" ]]; then
- touch ${inputfile}
-fi
-
-# copy dummy-infer to bin folder
-cp dummy-infer ../bin/dummy-infer
-
-# run test
-one-infer -b dummy --post-process "./one-infer-test-post-process.py TOKEN" -- ${inputfile} > ${filename}.log 2>&1
-return_code=$?
-
-rm -rf ../bin/dummy-infer
-
-if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
- if [ "$return_code" -eq "0" ]; then
- echo "${filename_ext} SUCCESS"
- exit 0
- fi
-fi
-
-trap_err_onexit
trap_err_onexit()
{
- if grep -q "error: the following arguments are required: {-d/--driver | -b/--backend}" "${filename}.log"; then
+ if grep -q "error: the following arguments are required: -d/--driver" "${filename}.log"; then
echo "${filename_ext} SUCCESS"
exit 0
fi
trap trap_err_onexit ERR
+rm -f ${filename}.log
+
# run test
one-infer > ${filename}.log 2>&1
trap trap_err_onexit ERR
+rm -f ${filename}.log
+
# run test
-one-infer -d ${driver_name} -- -h> ${filename}.log 2>&1
+one-infer -d ${driver_name} -- -h > ${filename}.log 2>&1
echo "${filename_ext} FAILED"
exit 255
# See the License for the specific language governing permissions and
# limitations under the License.
-# passed backend is not found
+# one-infer with invalid post process script
filename_ext="$(basename -- $0)"
filename="${filename_ext%.*}"
-backend_name="neg"
trap_err_onexit()
{
- if grep -q "FileNotFoundError: ${backend_name}-infer not found" "${filename}.log"; then
- echo "${filename_ext} SUCCESS"
- exit 0
+ return_code=$?
+ if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+ # Case of succeed of inference driver but error after it
+ if [ "$return_code" -ne "0" ]; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
fi
echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-infer
exit 255
}
trap trap_err_onexit ERR
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+ touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+rm -f ${filename}.log
+
# run test
-one-infer -b ${backend_name} -- -h> ${filename}.log 2>&1
+one-infer -d dummy-infer --post-process "./one-infer-test-post-process.py" -- ${inputfile} > ${filename}.log 2>&1
+rm -rf ../bin/dummy-infer
echo "${filename_ext} FAILED"
exit 255
+++ /dev/null
-#!/bin/bash
-
-# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# both -b and -d option drivers are given as argument
-
-filename_ext="$(basename -- $0)"
-filename="${filename_ext%.*}"
-backend_name="neg"
-driver_name="neg2"
-
-trap_err_onexit()
-{
- if grep -q "\-d and -b options are mutually exclusive. Please use only one of them" "${filename}.log"; then
- echo "${filename_ext} SUCCESS"
- exit 0
- fi
-
- echo "${filename_ext} FAILED"
- exit 255
-}
-
-trap trap_err_onexit ERR
-
-# run test
-one-infer -d ${driver_name} -b ${backend_name} -- -h> ${filename}.log 2>&1
-
-echo "${filename_ext} FAILED"
-exit 255
+++ /dev/null
-#!/bin/bash
-
-# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# one-infer with invalid post process script
-
-filename_ext="$(basename -- $0)"
-filename="${filename_ext%.*}"
-
-trap_err_onexit()
-{
- return_code=$?
- if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
- # Case of succeed of inference driver but error after it
- if [ "$return_code" -ne "0" ]; then
- echo "${filename_ext} SUCCESS"
- exit 0
- fi
- fi
-
- echo "${filename_ext} FAILED"
- rm -rf ../bin/dummy-infer
- exit 255
-}
-
-trap trap_err_onexit ERR
-
-inputfile="sample.tvn"
-
-if [[ ! -s "${inputfile}" ]]; then
- touch ${inputfile}
-fi
-
-# copy dummy-infer to bin folder
-cp dummy-infer ../bin/dummy-infer
-
-# run test
-one-infer -b dummy --post-process "./one-infer-test-post-process.py" -- ${inputfile} > ${filename}.log 2>&1
-
-rm -rf ../bin/dummy-infer
-echo "${filename_ext} FAILED"
-exit 255
inputfile="./inception_v3.circle"
outputfile="./inception_v3-opt.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test > /dev/null 2>&1
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
-
# run test
one-optimize --resolve_customop_add \
--input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
inputfile="./inception_v3.circle"
outputfile="./inception_v3-opt.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test > /dev/null 2>&1
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
-
# run test
one-optimize --resolve_customop_add \
--change_outputs InceptionV3/Logits/SpatialSqueeze1 \
--input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./UnidirSeqLSTM.tflite"
+intermfile="./UnidirSeqLSTM.circle"
+outputfile="./UnidirSeqLSTM-opt.circle"
+
+rm -f ${intermfile}
+rm -f ${outputfile}
+rm -f ${intermfile}.log
+rm -f ${outputfile}.log
+
+# run test
+one-import-tflite \
+--input_path ${inputfile} \
+--output_path ${intermfile} > /dev/null 2>&1
+
+one-optimize --unroll_unidirseqlstm \
+--input_path ${intermfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+# check UNIDIRECTIONAL_SEQUENCE_LSTM exist
+circle-operator --code ${intermfile} > ${intermfile}.log 2>&1
+if ! grep -q "UNIDIRECTIONAL_SEQUENCE_LSTM" "${intermfile}.log"; then
+ trap_err_onexit
+fi
+
+# check UNIDIRECTIONAL_SEQUENCE_LSTM absent
+circle-operator --code ${outputfile} > ${outputfile}.log 2>&1
+if grep -q "UNIDIRECTIONAL_SEQUENCE_LSTM" "${outputfile}.log"; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
outputfile="./inception_v3-opt.circle"
rm -rf ${outputfile}
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-optimize --resolve_customop_add \
outputfile="./inception_v3-opt.circle"
rm -rf ${outputfile}
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-optimize --resolve_customop_add \
inputfile="./inception_v3.circle"
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test >> /dev/null
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
+rm -f ${filename}.log
# run test
one-optimize --resolve_customop_add \
inputfile="./inception_v3.circle"
outputfolder="nnpack"
+rm -f ${filename}.log
rm -rf ${outputfolder}
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test > /dev/null 2>&1
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
-
# run test
one-pack \
-i ${inputfile} \
--o ${outputfolder} > /dev/null 2>&1
+-o ${outputfolder} > ${filename}.log 2>&1
if [[ ! -d "${outputfolder}" ]]; then
trap_err_onexit
partfile="${testmodel}.part"
outputfile="${testmodel}.conn.json"
+rm -f ${filename}.log
rm -rf ${testmodel}.000*
rm -rf ${testmodel}.conn.*
rm -rf ${testmodel}.*.log
# run test
one-partition \
--input_file ${inputfile} \
---part_file ${partfile} > /dev/null 2>&1
+--part_file ${partfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
echo "${filename_ext} SUCCESS"
exit 0
fi
+ # for debug build test
+ if grep -1 "std::runtime_error" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
echo "${filename_ext} FAILED"
exit 255
# copy help-profile to bin folder
cp help-profile ../bin/help-profile
+rm -f ${filename}.log
+
# run test
one-profile -b help -- -h > ${filename}.log
# copy dummy-profile to bin folder
cp dummy-profile ../bin/dummy-profile
+rm -f ${filename}.log
+
# run test
one-profile -b dummy ${inputfile} > ${filename}.log
trap trap_err_onexit ERR
+rm -f ${filename}.log
+
# run test
one-profile -h > ${filename}.log
# copy dummy-profile to bin folder
cp dummy-profile ../bin/dummy-profile
+rm -f ${filename}.log
+
# run test
one-profile -C ${configfile} > ${filename}.log
trap trap_err_onexit ERR
+rm -f ${filename}.log
+
# run test
one-profile > ${filename}.log 2>&1
inputfile="./inception_v3.circle"
outputfile="./inception_v3.quantized.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test > /dev/null 2>&1
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
-
# run test
one-quantize \
--input_dtype float32 \
--quantized_dtype uint8 \
--input_path ./inception_v3.circle \
--input_data ./inception_v3_test_data.h5 \
---output_path ./inception_v3.quantized.circle > /dev/null 2>&1
+--output_path ./inception_v3.quantized.circle > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
inputfile="./inception_v3.circle"
outputfile="./inception_v3.random.quantized.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test > /dev/null 2>&1
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
-
# run test without input data
one-quantize \
--input_dtype float32 \
--quantized_dtype uint8 \
---input_path ./inception_v3.circle \
---output_path ./inception_v3.random.quantized.circle > /dev/null 2>&1
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
inputfile="./inception_v3.circle"
outputfile="./inception_v3.list.quantized.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test with list-format input data (datalist.txt)
one-quantize \
--input_dtype float32 \
--quantized_dtype uint8 \
---input_path ./inception_v3.circle \
+--input_path ${inputfile} \
--input_data ./datalist.txt \
--input_data_format list \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
inputfile="./inception_v3.circle"
outputfile="./inception_v3.directory.quantized.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test with directory-format input data (raw_files)
--input_path ${inputfile} \
--input_data ./raw_files \
--input_data_format directory \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
inputfile="./inception_v3.mat.q8.circle"
outputfile="./inception_v3.one-quantize_005.q8.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test with force_quantparam option
--scale 2.3 \
--zero_point 33 \
--input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
inputfile="./inception_v3.mat.q8.circle"
outputfile="./inception_v3.one-quantize_006.q8.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test with force_quantparam option (multi tensors)
--scale 2.3 \
--zero_point 33 \
--input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
inputfile="./inception_v3.circle"
outputfile="./inception_v3.random.quantized.q16.iq8.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test > /dev/null 2>&1
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
-
# run test without input data
one-quantize \
--input_dtype float32 \
--granularity channel \
--input_type uint8 \
--input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
inputfile="./inception_v3.circle"
outputfile="./inception_v3.random.quantized.q16.oq8.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test > /dev/null 2>&1
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
-
# run test without input data
one-quantize \
--input_dtype float32 \
--granularity channel \
--output_type uint8 \
--input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
inputfile="./inception_v3.circle"
outputfile="./inception_v3.random.quantized.mixed.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test > /dev/null 2>&1
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
-
# run test without input data
one-quantize \
--input_dtype float32 \
--granularity channel \
--quant_config one-quantize_009.qconf.json \
--input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
outputfile="./inception_v3.one-quantize_010.q.circle"
datafile="./inception_v3_test_data.h5"
+rm -f ${filename}.log
rm -rf ${outputfile}
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test > /dev/null 2>&1
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
-
# run test
one-quantize \
--input_dtype float32 \
outputfile="./inception_v3.one-quantize_011.q.circle"
datafile="./inception_v3_test_data.h5"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
inputfile="./inception_v3.circle"
outputfile="./inception_v3.one-quantize_012.q.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test without input data
--granularity channel \
--quant_config one-quantize_012.qconf.json \
--input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
inputfile="./inception_v3.circle"
outputfile="./inception_v3.one-quantize_013.q.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test without input data
--input_dtype float32 \
--quant_config one-quantize_013.qconf.json \
--input_path ${inputfile} \
---output_path ${outputfile} > /dev/null 2>&1
+--output_path ${outputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
outputfile="./inception_v3.one-quantize_014.q.circle"
datadir="./raw_files/"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
inputfile="./inception_v3.mat.q8.circle"
outputfile="./inception_v3.one-quantize_015.fq.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+# TODO Resolve circledump not found
+# https://github.com/Samsung/ONE/issues/10550
+if ! command -v circledump &> /dev/null
+then
+ echo "${filename_ext} SKIPPED"
+ exit 0
+fi
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./reshape_matmul.circle"
+outputfile="./reshape_matmul.one-quantize_016.circle"
+
+rm -f ${filename}.log
+rm -f ${filename}.first.cdump
+rm -f ${filename}.second.cdump
+rm -f ${outputfile}
+
+# run test with different input_type
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--input_type uint8,int16 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+circledump ${outputfile} | grep serving_default_l.1:0$ > ${filename}.first.cdump
+circledump ${outputfile} | grep serving_default_r.1:0$ > ${filename}.second.cdump
+
+# check dtype of the first input (uint8)
+if ! grep -q "UINT8" "${filename}.first.cdump"; then
+ trap_err_onexit
+fi
+
+# check dtype of the second input (int16)
+if ! grep -q "INT16" "${filename}.second.cdump"; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
outputfile="./inception_v3.quantized.circle"
rm -rf ${outputfile}
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# test begin
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test >> /dev/null
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
-
one-quantize \
--input_dtype float64 \
--quantized_dtype uint8 \
outputfile="./inception_v3.quantized.circle"
rm -rf ${outputfile}
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test >> /dev/null
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
+rm -f ${filename}.log
# run test
one-quantize \
outputfile="./inception_v3.quantized.circle"
rm -rf ${outputfile}
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test >> /dev/null
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
+rm -f ${filename}.log
# run test
one-quantize \
inputdata="./inception_v3_test_data.h5"
outputfile="."
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test >> /dev/null
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
+rm -f ${filename}.log
# run test
one-quantize \
inputdata="./inception_v3_test_data.h5"
outputfile="./inception_v3.quantized.circle"
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-quantize \
inputdata="./inception_v3_test_data.h5"
outputfile="./inception_v3.quantized.circle"
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-quantize \
inputdata="./inception_v3.circle"
outputfile="./inception_v3.quantized.circle"
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test >> /dev/null
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
+rm -f ${filename}.log
# run test
one-quantize \
inputdata="./inception_v3_test_data.h5"
outputfile="./inception_v3.quantized.circle"
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test >> /dev/null
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
+rm -f ${filename}.log
# run test
one-quantize \
inputdata="./inception_v3_test_data.h5"
outputfile="./inception_v3.quantized.circle"
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test >> /dev/null
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
+rm -f ${filename}.log
# run test
one-quantize \
inputdata="./inception_v3_test_data.h5"
outputfile="./inception_v3.quantized.circle"
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test >> /dev/null
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
+rm -f ${filename}.log
# run test
one-quantize \
inputdata="./inception_v3_test_data.h5"
outputfile="./inception_v3.quantized.circle"
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test >> /dev/null
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
+rm -f ${filename}.log
# run test
one-quantize \
inputdata="./inception_v3_test_data.h5"
outputfile="./inception_v3.quantized.circle"
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test >> /dev/null
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
+rm -f ${filename}.log
# run test
one-quantize \
inputdata="./inception_v3_test_data.h5"
outputfile="./inception_v3.quantized.circle"
-rm -rf ${outputfile}.log
-
-# to create inception_v3.circle
-if [[ ! -s ${inputfile} ]]; then
- /bin/bash one-import_001.test >> /dev/null
- return_code=$?
- if [[ ${return_code} != 0 ]]; then
- trap_err_onexit
- fi
-fi
+rm -f ${filename}.log
# run test
one-quantize \
inputdata="./datalist.txt"
outputfile="./inception_v3.quantized.circle"
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-quantize \
inputdata="./inception_v3_test_data.h5"
outputfile="./inception_v3.quantized.circle"
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-quantize \
inputdata="./datalist.txt"
outputfile="./inception_v3.quantized.circle"
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-quantize \
inputdata="./inception_v3_test_data.h5"
outputfile="./inception_v3.quantized.circle"
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-quantize \
inputfile="./inception_v3.mat.q8.circle"
outputfile="./inception_v3.neg_018.q8.circle"
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-quantize \
inputfile="./inception_v3.circle"
outputfile="./inception_v3.quantized.neg_019.circle"
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-quantize \
inputfile="./inception_v3.circle"
outputfile="./inception_v3.quantized.neg_020.circle"
-rm -rf ${outputfile}.log
+rm -f ${filename}.log
# run test
one-quantize \
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Wrong number of input_type in one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Invalid number of input dtype" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./reshape_matmul.circle"
+outputfile="./reshape_matmul.quantized.neg_021.circle"
+
+rm -f ${filename}.log
+
+# run test with wrong number of input_type
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--input_type uint8,int16,uint8 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
[one-import-tf]
input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_001.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
converter_version=v2
[one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_001.circle
output_path=inception_v3.opt.circle
configfile="onecc_001.cfg"
outputfile="inception_v3.opt.circle"
+rm -f ${filename}.log
+rm -f ${outputfile}
+
# run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
[one-import-tf]
input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_002.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
converter_version=v2
[one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_002.circle
output_path=inception_v3.opt.circle
[one-pack]
configfile="onecc_002.cfg"
outputfile="inception_v3_pkg"
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
# run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
[one-import-tf]
input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_003.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
converter_version=v1
[one-quantize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_003.circle
output_path=inception_v3.quantized.circle
input_data=inception_v3_test_data.h5
configfile="onecc_003.cfg"
outputfile="inception_v3.quantized.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
[one-import-tf]
input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_004.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
[one-codegen]
backend=dummy
-command=-o sample.tvn inception_v3.circle
+command=-o sample.tvn inception_v3.onecc_004.circle
configfile="onecc_004.cfg"
outputfile="sample.tvn"
+rm -f ${filename}.log
rm -rf ${outputfile}
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
# run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
[one-import-tflite]
input_path=inception_v3.tflite
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_005.circle
[one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_005.circle
output_path=inception_v3.opt.circle
[one-codegen]
configfile="onecc_005.cfg"
outputfile="sample.tvn"
+rm -f ${filename}.log
rm -rf ${outputfile}
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
# run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
[one-import-tf]
input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_006.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
converter_version=v1
[one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_006.circle
output_path=inception_v3.opt.circle
[one-quantize]
configfile="onecc_006.cfg"
outputfile="sample.tvn"
+rm -f ${filename}.log
rm -rf ${outputfile}
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
# run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
[one-import-tf]
input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_007.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
converter_version=v1
[one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_007.circle
output_path=inception_v3.opt.circle
[one-quantize]
configfile="onecc_007.cfg"
outputfile="inception_v3_pkg"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="onecc_008.cfg"
outputfile="test_onnx_model.bin"
+rm -f ${filename}.log
rm -rf ${outputfile}
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
# run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="onecc_009.cfg"
outputfile="onnx_conv2d_conv2d.bin"
+rm -f ${filename}.log
rm -rf ${outputfile}
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
# run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
outputfile="inception_v3.alt.circle"
intermfile="inception_v3.alt.tflite"
+rm -f ${filename}.log
rm -rf ${outputfile}
rm -rf ${intermfile}
# run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
outputfile="test_onnx_model.circle"
intermfile="test_onnx_model.tflite"
+rm -f ${filename}.log
rm -rf ${outputfile}
rm -rf ${intermfile}
# run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
[one-import-tf]
input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_012.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
converter_version=v1
[one-quantize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_012.circle
output_path=inception_v3.list.quantized.circle
input_data=datalist.txt
input_data_format=list
configfile="onecc_012.cfg"
outputfile="inception_v3.list.quantized.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
[one-import-tf]
input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_13.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
trap trap_err_onexit ERR
configfile="onecc_013.cfg"
-outputfile="inception_v3.circle"
+outputfile="inception_v3.onecc_13.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
-onecc import tf -C ${configfile} > /dev/null 2>&1
+onecc import tf -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
[one-import-tflite]
input_path=inception_v3.tflite
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_014.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
trap trap_err_onexit ERR
configfile="onecc_014.cfg"
-outputfile="inception_v3.circle"
+outputfile="inception_v3.onecc_014.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
-onecc import tflite -C ${configfile} > /dev/null 2>&1
+onecc import tflite -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="onecc_015.cfg"
outputfile="bcq.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
-onecc import bcq -C ${configfile} > /dev/null 2>&1
+onecc import bcq -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="onecc_016.cfg"
outputfile="test_onnx_model.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
-onecc import onnx -C ${configfile} > /dev/null 2>&1
+onecc import onnx -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
trap_err_onexit
fi
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
-onecc optimize -i ${inputfile} -o ${outputfile} > /dev/null 2>&1
+onecc optimize -i ${inputfile} -o ${outputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
trap_err_onexit
fi
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
-onecc quantize -i ${inputfile} -o ${outputfile} -d ${inputdata} > /dev/null 2>&1
+onecc quantize -i ${inputfile} -o ${outputfile} -d ${inputdata} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
trap_err_onexit
fi
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
-onecc pack -i ${inputfile} -o ${outputfile} > /dev/null 2>&1
+onecc pack -i ${inputfile} -o ${outputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
trap_err_onexit
fi
+rm -f ${filename}.log
rm -rf ${outputfile}
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
# run test
-onecc codegen -b dummy -o ${outputfile} ${inputfile} > /dev/null 2>&1
+onecc codegen -b dummy -o ${outputfile} ${inputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
# copy dummy-profile to bin folder
cp dummy-profile ../bin/dummy-profile
+rm -f ${filename}.log
+
# run test
-onecc -C ${configfile} > ${filename}.log
+onecc -C ${configfile} > ${filename}.log 2>&1
rm -rf ../bin/dummy-profile
configfile="onecc_022.cfg"
outputfile="inception_v3.onecc_022.q8.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="onecc_023.cfg"
outputfile="inception_v3.onecc_023.q16.iq8.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
[one-import-tf]
input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_024.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
converter_version=v1
[one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_024.circle
output_path=inception_v3.opt.circle
make_batchnorm_gamma_positive=False
configfile="onecc_024.cfg"
outputfile="inception_v3.opt.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
if [ ! -d "../optimization" ]; then
clean_envir
-if grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then
+if ! grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then
echo "${filename_ext} SUCCESS"
exit 0
fi
[one-import-tf]
input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_025.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
converter_version=v2
[one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_025.circle
output_path=inception_v3.opt.circle
configfile="onecc_001.cfg"
outputfile="inception_v3.opt.circle"
+rm -f ${filename}.log
+rm -f ${outputfile}
+
# run test
-onecc -C ${configfile} > /dev/null 2>&1
+onecc -C ${configfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
configfile="onecc_026.cfg"
outputfile="inception_v3.onecc_026.q.circle"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
one-infer=True
[one-infer]
-backend=dummy
+driver=dummy-infer
command=test_onnx_model.bin
# copy dummy-infer to bin folder
cp dummy-infer ../bin/dummy-infer
+rm -f ${filename}.log
+
# run test
-onecc -C ${configfile} > ${filename}.log
+onecc -C ${configfile} > ${filename}.log 2>&1
rm -rf ../bin/dummy-infer
workflowfile="onecc_028.workflow.json"
outputfile="inception_v3_pkg"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
"one-cmd": "one-import-tf",
"commands": {
"input_path": "inception_v3.pb",
- "output_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_028.circle",
"input_arrays": "input",
"input_shapes": "1,299,299,3",
"output_arrays": "InceptionV3/Predictions/Reshape_1",
"OPTIMIZE": {
"one-cmd": "one-optimize",
"commands": {
- "input_path": "inception_v3.circle",
+ "input_path": "inception_v3.onecc_028.circle",
"output_path": "inception_v3.opt.circle"
}
},
outputfile="inception_v3.quantized.circle"
rm -rf ${outputfile}
+rm -f ${filename}.log
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
"one-cmd": "one-import-tf",
"commands": {
"input_path": "inception_v3.pb",
- "output_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_029.circle",
"input_arrays": "input",
"input_shapes": "1,299,299,3",
"output_arrays": "InceptionV3/Predictions/Reshape_1",
"QUANTIZE": {
"one-cmd": "one-quantize",
"commands": {
- "input_path": "inception_v3.circle",
+ "input_path": "inception_v3.onecc_029.circle",
"output_path": "inception_v3.quantized.circle",
"input_data": "inception_v3_test_data.h5"
}
outputfile="sample.tvn"
rm -rf ${outputfile}
+rm -f ${filename}.log
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
"one-cmd": "one-import-tf",
"commands": {
"input_path": "inception_v3.pb",
- "output_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_030.circle",
"input_arrays": "input",
"input_shapes": "1,299,299,3",
"output_arrays": "InceptionV3/Predictions/Reshape_1",
"one-cmd": "one-codegen",
"commands": {
"backend": "dummy",
- "command": "-o sample.tvn inception_v3.circle"
+ "command": "-o sample.tvn inception_v3.onecc_030.circle"
}
}
}
outputfile="sample.tvn"
rm -rf ${outputfile}
+rm -f ${filename}.log
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
"one-cmd": "one-import-tflite",
"commands": {
"input_path": "inception_v3.tflite",
- "output_path": "inception_v3.circle"
+ "output_path": "inception_v3.onecc_031.circle"
}
},
"optimize": {
"one-cmd": "one-optimize",
"commands": {
- "input_path": "inception_v3.circle",
+ "input_path": "inception_v3.onecc_031.circle",
"output_path": "inception_v3.opt.circle"
}
},
outputfile="sample.tvn"
rm -rf ${outputfile}
+rm -f ${filename}.log
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
"one-cmd": "one-import-tflite",
"commands": {
"input_path": "inception_v3.tflite",
- "output_path": "inception_v3.circle"
+ "output_path": "inception_v3.onecc_032.circle"
}
},
"optimize": {
"one-cmd": "one-optimize",
"commands": {
- "input_path": "inception_v3.circle",
+ "input_path": "inception_v3.onecc_032.circle",
"output_path": "inception_v3.opt.circle"
}
},
"quantize": {
"one-cmd": "one-quantize",
"commands": {
- "input_path": "inception_v3.circle",
+ "input_path": "inception_v3.onecc_032.circle",
"output_path": "inception_v3.quantized.circle",
"input_data": "inception_v3_test_data.h5"
}
outputfile="inception_v3_pkg"
rm -rf ${outputfile}
+rm -f ${filename}.log
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
"one-cmd": "one-import-tflite",
"commands": {
"input_path": "inception_v3.tflite",
- "output_path": "inception_v3.circle"
+ "output_path": "inception_v3.onecc_033.circle"
}
},
"optimize": {
"one-cmd": "one-optimize",
"commands": {
- "input_path": "inception_v3.circle",
+ "input_path": "inception_v3.onecc_033.circle",
"output_path": "inception_v3.opt.circle"
}
},
"quantize": {
"one-cmd": "one-quantize",
"commands": {
- "input_path": "inception_v3.circle",
+ "input_path": "inception_v3.onecc_033.circle",
"output_path": "inception_v3.quantized.circle",
"input_data": "inception_v3_test_data.h5"
}
outputfile="onnx_conv2d_conv2d.bin"
rm -rf ${outputfile}
+rm -f ${filename}.log
# copy dummy-compile to bin folder
cp dummy-compile ../bin/dummy-compile
rm -rf ${outputfile}
rm -rf ${intermfile}
+rm -f ${filename}.log
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
rm -rf ${outputfile}
rm -rf ${intermfile}
+rm -f ${filename}.log
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
outputfile="inception_v3.opt.circle"
rm -rf ${outputfile}
+rm -f ${filename}.log
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
"one-cmd": "one-import-tf",
"commands": {
"input_path": "inception_v3.pb",
- "output_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_037.circle",
"input_arrays": "input",
"input_shapes": "1,299,299,3",
"output_arrays": "InceptionV3/Predictions/Reshape_1",
"OPTIMIZE": {
"one-cmd": "one-optimize",
"commands": {
- "input_path": "inception_v3.circle",
+ "input_path": "inception_v3.onecc_037.circle",
"output_path": "inception_v3.opt.circle"
}
}
outputfile="inception_v3.list.quantized.circle"
rm -rf ${outputfile}
+rm -f ${filename}.log
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
"one-cmd": "one-import-tf",
"commands": {
"input_path": "inception_v3.pb",
- "output_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_038.circle",
"input_arrays": "input",
"input_shapes": "1,299,299,3",
"output_arrays": "InceptionV3/Predictions/Reshape_1",
"QUANTIZE": {
"one-cmd": "one-quantize",
"commands": {
- "input_path": "inception_v3.circle",
+ "input_path": "inception_v3.onecc_038.circle",
"output_path": "inception_v3.list.quantized.circle",
"input_data": "datalist.txt",
"input_data_format": "list"
outputfile="inception_v3.onecc_039.q.circle"
rm -rf ${outputfile}
+rm -f ${filename}.log
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
[one-import-tf]
input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_040.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
converter_version=v2
[one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_040.circle
output_path=inception_v3.opt.circle
outputfile="inception_v3.opt.circle"
rm -rf ${outputfile}
+rm -f ${filename}.log
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
cp dummy-inferV2 ../bin/dummy-inferV2
rm -rf ${outputfile1} {outputfile2}
+rm -f ${filename}.log
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
"one-cmd": "one-import-tf",
"commands": {
"input_path": "inception_v3.pb",
- "output_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_041.circle",
"input_arrays": "input",
"input_shapes": "1,299,299,3",
"output_arrays": "InceptionV3/Predictions/Reshape_1",
"OPTIMIZE": {
"one-cmd": "one-optimize",
"commands": {
- "input_path": "inception_v3.circle",
+ "input_path": "inception_v3.onecc_041.circle",
"output_path": "inception_v3.opt.circle"
}
}
--- /dev/null
+[Environment]
+SPM_SIZE=256KB
+
+[onecc]
+one-codegen=True
+
+[one-codegen]
+backend=dummyEnv
+command=dummy_env.bin
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-codegen with Environment section
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+outputfile="dummy_env.bin"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummyEnv-compile
+ rm -rf ${outputfile}
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_042.cfg"
+
+rm -rf ${outputfile}
+rm -rf ${filename}.log
+
+# copy dummyEnv-compile to bin folder
+cp dummyEnv-compile ../bin/dummyEnv-compile
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+if grep -q "SPM_SIZE=256KB" "${outputfile}"; then
+ echo "${filename_ext} SUCCESS"
+ rm -rf ../bin/dummyEnv-compile
+ rm -rf ${outputfile}
+ exit 0
+fi
+
+trap_err_onexit
--- /dev/null
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+include=O1
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.onecc_043.opt.circle
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test for "O1=True" option in onecc config file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_043.cfg"
+outputfile="inception_v3.onecc_043.opt.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+# O1.list is dynamically created from onelib/export_constant.py
+readarray -t O1_OPTS < "O1.list"
+readarray -t NO_O1_OPTS < "non-O1.list"
+
+for opt in "${O1_OPTS[@]}"
+do
+ if ! grep -q ${opt} ${outputfile}.log; then
+ trap_err_onexit
+ fi
+done
+
+for no_opt in "${NO_O1_OPTS[@]}"
+do
+ if grep -q ${no_opt} ${outputfile}.log; then
+ trap_err_onexit
+ fi
+done
+
+echo "${filename_ext} SUCCESS"
--- /dev/null
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+include=O1
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
+
+[one-optimize]
+input_path=test_onnx_model.circle
+output_path=test_onnx_model.onecc_044.opt.circle
+convert_nchw_to_nhwc=True
+fold_add_v2=False
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test for "O1=True" option with other options
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_044.cfg"
+outputfile="test_onnx_model.onecc_044.opt.circle"
+
+rm -f ${filename}.log
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+readarray -t OPTS < "O1.list"
+readarray -t NO_OPTS < "non-O1.list"
+
+OPTS+=("convert_nchw_to_nhwc")
+for i in "${!NO_OPTS[@]}"; do
+ if [[ ${NO_OPTS[i]} = "convert_nchw_to_nhwc" ]]; then
+ unset 'NO_OPTS[i]'
+ fi
+done
+
+NO_OPTS+=("fold_add_v2")
+for i in "${!OPTS[@]}"; do
+ if [[ ${OPTS[i]} = "fold_add_v2" ]]; then
+ unset 'OPTS[i]'
+ fi
+done
+
+for opt in "${OPTS[@]}"
+do
+ if ! grep -q ${opt} ${outputfile}.log; then
+ trap_err_onexit
+ fi
+done
+
+for no_opt in "${NO_OPTS[@]}"
+do
+ if grep -q ${no_opt} ${outputfile}.log; then
+ trap_err_onexit
+ fi
+done
+
+echo "${filename_ext} SUCCESS"
--- /dev/null
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-quantize]
+input_path=reshape_matmul.circle
+output_path=reshape_matmul.onecc_045.q.circle
+input_type=uint8,int16
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+# TODO Resolve circledump not found
+# https://github.com/Samsung/ONE/issues/10550
+if ! command -v circledump &> /dev/null
+then
+ echo "${filename_ext} SKIPPED"
+ exit 0
+fi
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./reshape_matmul.circle"
+configfile="onecc_045.cfg"
+outputfile="reshape_matmul.onecc_045.q.circle"
+
+rm -f ${filename}.log
+rm -f ${filename}.first.cdump
+rm -f ${filename}.second.cdump
+rm -f ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+circledump ${outputfile} | grep serving_default_l.1:0$ > ${filename}.first.cdump
+circledump ${outputfile} | grep serving_default_r.1:0$ > ${filename}.second.cdump
+
+# check dtype of the first input (uint8)
+if ! grep -q "UINT8" "${filename}.first.cdump"; then
+ trap_err_onexit
+fi
+
+# check dtype of the second input (int16)
+if ! grep -q "INT16" "${filename}.second.cdump"; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
configfile="onecc_neg_001.cfg"
+rm -f ${filename}.log
+
# run test
onecc -C ${configfile} > ${filename}.log 2>&1
[one-import-tf]
input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_neg_002.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
converter_version=v2
[one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_neg_002.circle
output_path=inception_v3.opt.circle
configfile="onecc_neg_002.cfg"
+rm -f ${filename}.log
+
# run test
onecc -C ${configfile} > ${filename}.log 2>&1
[one-import-tf]
input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_neg_003.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
converter_version=v2
[one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_neg_003.circle
output_path=inception_v3.opt.circle
[one-pack]
configfile="onecc_neg_003.cfg"
+rm -f ${filename}.log
+
# run test
onecc -C ${configfile} > ${filename}.log 2>&1
[one-import-tf]
input_path=inception_v3.pb
-output_path=inception_v3.circle
+output_path=inception_v3.onecc_neg_004.circle
input_arrays=input
input_shapes=1,299,299,3
output_arrays=InceptionV3/Predictions/Reshape_1
converter_version=v2
[one-optimize]
-input_path=inception_v3.circle
+input_path=inception_v3.onecc_neg_004.circle
output_path=inception_v3.opt.circle
[one-optimize]
configfile="onecc_neg_004.cfg"
+rm -f ${filename}.log
+
# run test
onecc -C ${configfile} > ${filename}.log 2>&1
rm -rf ${outputfile}
rm -rf ${intermfile}
+rm -f ${filename}.log
# run test
onecc -C ${configfile} > ${filename}.log 2>&1
rm -rf ${outputfile}
rm -rf ${intermfile}
+rm -f ${filename}.log
# run test
onecc -C ${configfile} > ${filename}.log 2>&1
trap trap_err_onexit ERR
+rm -f ${filename}.log
+
# run test
onecc wronginput > ${filename}.log 2>&1
trap trap_err_onexit ERR
+rm -f ${filename}.log
+
# run test
onecc > ${filename}.log 2>&1
configfile=".."
+rm -f ${filename}.log
+
# run test
onecc -C ${configfile} -OONECC_NEG_009 > ${filename}.log 2>&1
configfile=".."
+rm -f ${filename}.log
+
# run test
onecc -C ${configfile} -OONECC_NEG_010 > ${filename}.log 2>&1
configfile="onecc_neg_011.cfg"
+rm -f ${filename}.log
+
# run test
onecc -C ${configfile} > ${filename}.log 2>&1
[one-infer]
driver=dummy-infer
-backend=dummy
command="dummy arguments"
# See the License for the specific language governing permissions and
# limitations under the License.
-# Check driver and backend option is mutually exclusive
+# Check the case when driver does not exist
filename_ext="$(basename -- $0)"
filename="${filename_ext%.*}"
trap_err_onexit()
{
- if grep -q "\-d and -b options are mutually exclusive" "${filename}.log"; then
+ if grep -q "dummy-infer not found" "${filename}.log"; then
echo "${filename_ext} SUCCESS"
exit 0
fi
configfile="onecc_neg_012.cfg"
+rm -f ${filename}.log
+
# run test
onecc -C ${configfile} > ${filename}.log 2>&1
workflowfile="onecc_neg_013.workflow.json"
+rm -f ${filename}.log
+
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
workflowfile="onecc_neg_014.workflow.json"
+rm -f ${filename}.log
+
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
workflowfile="onecc_neg_015.workflow.json"
+rm -f ${filename}.log
+
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
workflowfile="onecc_neg_016.workflow.json"
+rm -f ${filename}.log
+
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
workflowfile="onecc_neg_017.workflow.json"
+rm -f ${filename}.log
+
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
workflowfile="onecc_neg_018.workflow.json"
+rm -f ${filename}.log
+
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
"one-cmd": "one-import-tf",
"commands": {
"input_path": "inception_v3.pb",
- "output_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_neg_018.circle",
"input_arrays": "input",
"input_shapes": "1,299,299,3",
"output_arrays": "InceptionV3/Predictions/Reshape_1",
workflowfile="onecc_neg_019.workflow.json"
+rm -f ${filename}.log
+
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
"one-cmddddddddd": "one-import-tf",
"commands": {
"input_path": "inception_v3.pb",
- "output_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_neg_019.circle",
"input_arrays": "input",
"input_shapes": "1,299,299,3",
"output_arrays": "InceptionV3/Predictions/Reshape_1",
workflowfile="onecc_neg_020.workflow.json"
+rm -f ${filename}.log
+
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
"one-cmd": "one-import-tf",
"commandssssssssss": {
"input_path": "inception_v3.pb",
- "output_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_neg_020.circle",
"input_arrays": "input",
"input_shapes": "1,299,299,3",
"output_arrays": "InceptionV3/Predictions/Reshape_1",
workflowfile="onecc_neg_021.workflow.json"
+rm -f ${filename}.log
+
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
"one-cmd": "one-import-tf",
"commands": {
"input_path": "inception_v3.pb",
- "output_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_neg_021.circle",
"input_arrays": "input",
"input_shapes": "1,299,299,3",
"output_arrays": "InceptionV3/Predictions/Reshape_1",
"one-cmd": "one-import-tf",
"commands": {
"input_path": "inception_v3.pb",
- "output_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_neg_021.circle",
"input_arrays": "input",
"input_shapes": "1,299,299,3",
"output_arrays": "InceptionV3/Predictions/Reshape_1",
workflowfile="onecc_neg_022.workflow.json"
+rm -f ${filename}.log
+
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
"one-cmd": "one-import-tf",
"commands": {
"input_path": "inception_v3.pb",
- "output_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_neg_022.circle",
"input_arrays": "input",
"input_shapes": "1,299,299,3",
"output_arrays": "InceptionV3/Predictions/Reshape_1",
"OPTIMIZE": {
"one-cmd": "one-optimize",
"commands": {
- "input_path": "inception_v3.circle",
+ "input_path": "inception_v3.onecc_neg_022.circle",
"output_path": "inception_v3.opt.circle"
}
}
workflowfile="onecc_neg_023.workflow.json"
+rm -f ${filename}.log
+
# run test
onecc -W ${workflowfile} > ${filename}.log 2>&1
"one-cmd": "one-import-tf",
"commands": {
"input_path": "inception_v3.pb",
- "output_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_neg_023.circle",
"input_arrays": "input",
"input_shapes": "1,299,299,3",
"output_arrays": "InceptionV3/Predictions/Reshape_1",
"OPTIMIZE": {
"one-cmd": "one-optimize",
"commands": {
- "input_path": "inception_v3.circle",
+ "input_path": "inception_v3.onecc_neg_023.circle",
"output_path": "inception_v3.opt.circle",
"change_outputs": "non_existing_node_name"
}
--- /dev/null
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+include=O # invalid (too short group option)
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
+
+[one-optimize]
+input_path=test_onnx_model.circle
+output_path=test_onnx_model.onecc_neg_024.opt.circle
+convert_nchw_to_nhwc=True
+fold_add_v2=False
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# invalid group option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Invalid group option" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_024.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
--- /dev/null
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=True
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+include=A1 # invalid (must start with 'O')
+
+[one-import-onnx]
+input_path=test_onnx_model.onnx
+output_path=test_onnx_model.circle
+
+[one-optimize]
+input_path=test_onnx_model.circle
+output_path=test_onnx_model.onecc_neg_024.opt.circle
+convert_nchw_to_nhwc=True
+fold_add_v2=False
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# invalid group option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Invalid group option" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_025.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
--- /dev/null
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-quantize]
+input_path=reshape_matmul.circle
+output_path=reshape_matmul.onecc_045.q.circle
+input_type=uint8,int16,uint8
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Wrong number of input_type in one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Invalid number of input dtype" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./reshape_matmul.circle"
+configfile="onecc_neg_026.cfg"
+
+rm -f ${filename}.log
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
if [[ ! -s "inception_v3.pb" ]]; then
rm -rf inception_v3_2018_04_27.tgz
- wget https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v3_2018_04_27.tgz
+ wget -nv https://storage.googleapis.com/download.tensorflow.org/models/tflite/model_zoo/upload_20180427/inception_v3_2018_04_27.tgz
tar zxvf inception_v3_2018_04_27.tgz
fi
if [[ ! -s "while_3.pbtxt" ]]; then
rm -rf while_3.zip
- wget https://github.com/Samsung/ONE/files/5095630/while_3.zip
+ wget -nv https://github.com/Samsung/ONE/files/5095630/while_3.zip
unzip while_3.zip
# https://github.com/Samsung/ONE/issues/4155#issuecomment-689320297
fi
if [[ ! -s "mobilenet_test_data.h5" ]]; then
rm -rf mobilenet_test_data.zip
- wget https://github.com/Samsung/ONE/files/5139460/mobilenet_test_data.zip
+ wget -nv https://github.com/Samsung/ONE/files/5139460/mobilenet_test_data.zip
unzip mobilenet_test_data.zip
# https://github.com/Samsung/ONE/issues/4155#issuecomment-689321538
fi
if [[ ! -s "bcq.pb" ]]; then
rm -rf bcq.pb.zip
- wget https://github.com/Samsung/ONE/files/5153842/bcq.pb.zip
+ wget -nv https://github.com/Samsung/ONE/files/5153842/bcq.pb.zip
unzip bcq.pb.zip
# https://github.com/Samsung/ONE/issues/4155#issuecomment-689324597
fi
if [[ ! -s "img_files" ]]; then
rm -rf img_files.zip
- wget https://github.com/Samsung/ONE/files/5499172/img_files.zip
+ wget -nv https://github.com/Samsung/ONE/files/5499172/img_files.zip
unzip img_files.zip
# https://github.com/Samsung/ONE/issues/3213#issuecomment-722757499
fi
if [[ ! -d "test_saved_model" ]]; then
rm -rf test_saved_model.zip
- wget https://github.com/Samsung/ONE/files/5516226/test_saved_model.zip
+ wget -nv https://github.com/Samsung/ONE/files/5516226/test_saved_model.zip
unzip test_saved_model.zip
# https://github.com/Samsung/ONE/issues/4268#issuecomment-724578237
fi
if [[ ! -s "test_keras_model.h5" ]]; then
rm -rf test_keras_model.zip
- wget https://github.com/Samsung/ONE/files/5520777/test_keras_model.zip
+ wget -nv https://github.com/Samsung/ONE/files/5520777/test_keras_model.zip
unzip test_keras_model.zip
# https://github.com/Samsung/ONE/issues/4268#issuecomment-725025805
fi
if [[ ! -s "test_onnx_model.onnx" ]]; then
rm -rf test_onnx_model.zip
- wget https://github.com/Samsung/ONE/files/5768243/test_onnx_model.zip
+ wget -nv https://github.com/Samsung/ONE/files/5768243/test_onnx_model.zip
unzip test_onnx_model.zip
# https://github.com/Samsung/ONE/issues/5548#issuecomment-754373360
fi
if [[ ! -s "onnx_conv2d_conv2d.onnx" ]]; then
rm -rf onnx_conv2d_conv2d.zip
- wget https://github.com/Samsung/ONE/files/5774648/onnx_conv2d_conv2d.zip
+ wget -nv https://github.com/Samsung/ONE/files/5774648/onnx_conv2d_conv2d.zip
unzip onnx_conv2d_conv2d.zip
# https://github.com/Samsung/ONE/issues/5577#issuecomment-755078444
fi
if [[ ! -s "reshape_matmul.onnx" ]]; then
rm -rf reshape_matmul.zip
- wget https://github.com/Samsung/ONE/files/9082878/reshape_matmul.zip
+ wget -nv https://github.com/Samsung/ONE/files/9082878/reshape_matmul.zip
unzip reshape_matmul.zip
# https://github.com/Samsung/ONE/issues/9405#issuecomment-1180198137
fi
+# prepare 'reshape_matmul.circle' file used for tests
+if [[ ! -s "reshape_matmul.circle" ]]; then
+ ../bin/one-import onnx \
+ --experimental_disable_batchmatmul_unfold \
+ -i reshape_matmul.onnx \
+ -o reshape_matmul.circle
+fi
+
if [[ ! -s "Net_InstanceNorm_003.part" ]]; then
rm -rf Net_InstanceNorm_003.zip
- wget https://github.com/Samsung/ONE/files/8608844/Net_InstanceNorm_003.zip
+ wget -nv https://github.com/Samsung/ONE/files/8608844/Net_InstanceNorm_003.zip
unzip Net_InstanceNorm_003.zip
# https://github.com/Samsung/ONE/issues/8570#issuecomment-1115804257
fi
+if [[ ! -s "UnidirSeqLSTM.tflite" ]]; then
+ rm -rf UnidirSeqLSTM.zip
+ wget -nv https://github.com/Samsung/ONE/files/10055255/UnidirSeqLSTM.zip
+ unzip UnidirSeqLSTM.zip
+ # https://github.com/Samsung/ONE/issues/9940#issuecomment-1293282484
+fi
+
function files_missing() {
condition="test "
if files_missing "${TEST_RECCURENT_MODELS[@]}"; then
rm -rf test_onnx_recurrent_models.zip
- wget https://github.com/Samsung/ONE/files/8067909/test_onnx_recurrent_models.zip
+ wget -nv https://github.com/Samsung/ONE/files/8067909/test_onnx_recurrent_models.zip
unzip test_onnx_recurrent_models.zip
# https://github.com/Samsung/ONE/issues/8395#issuecomment-1040072097
fi
if files_missing "${NEG_TEST_RECCURENT_MODELS[@]}"; then
rm -rf neg_test_onnx_recurrent_models.zip
- wget https://github.com/Samsung/ONE/files/8137183/neg_test_onnx_recurrent_models.zip
+ wget -nv https://github.com/Samsung/ONE/files/8137183/neg_test_onnx_recurrent_models.zip
unzip neg_test_onnx_recurrent_models.zip
# https://github.com/Samsung/ONE/issues/8395#issuecomment-1050364375
fi
outputfile="./output_testdata.h5"
+rm -f ${filename}.log
rm -rf ${outputfile}
# run test
rawdata2hdf5 \
--data_list datalist.txt \
---output_path ./output_testdata.h5 >> /dev/null
+--output_path ${outputfile} > ${filename}.log 2>&1
if [[ ! -s "${outputfile}" ]]; then
trap_err_onexit
outputfile="./output_testdata.h5"
rm -rf ${inputfile}
+rm -f ${filename}.log
touch ${inputfile}
echo "non-existing-file.data" >> ${inputfile}
inputfile="./inception_v3.circle"
outputfile="./output_testdata.h5"
+rm -f ${filename}.log
+
# run test
rawdata2hdf5 \
--data_list ${inputfile} \
outputfile="./output_testdata.h5"
+rm -f ${filename}.log
+
# run test
rawdata2hdf5 \
--output_path ${outputfile} > ${filename}.log 2>&1
outputfile="./non_existing_dir/output_testdata.h5"
+rm -f ${filename}.log
+
# run test
rawdata2hdf5 \
--data_list datalist.txt \
+++ /dev/null
-#!/usr/bin/env python
-
-# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import configparser
-import glob
-import importlib
-import ntpath
-import os
-import subprocess
-import sys
-
-import onelib.constant as _constant
-
-
-def _add_default_arg(parser):
- # version
- parser.add_argument(
- '-v',
- '--version',
- action='store_true',
- help='show program\'s version number and exit')
-
- # verbose
- parser.add_argument(
- '-V',
- '--verbose',
- action='store_true',
- help='output additional information to stdout or stderr')
-
- # configuration file
- parser.add_argument('-C', '--config', type=str, help='run with configuation file')
- # section name that you want to run in configuration file
- parser.add_argument('-S', '--section', type=str, help=argparse.SUPPRESS)
-
-
-def _add_default_arg_no_CS(parser):
- """
- This adds -v -V args only (no -C nor -S)
- """
- # version
- parser.add_argument(
- '-v',
- '--version',
- action='store_true',
- help='show program\'s version number and exit')
-
- # verbose
- parser.add_argument(
- '-V',
- '--verbose',
- action='store_true',
- help='output additional information to stdout or stderr')
-
-
-def is_accumulated_arg(arg, driver):
- if driver == "one-quantize":
- accumulables = [
- "tensor_name", "scale", "zero_point", "src_tensor_name", "dst_tensor_name"
- ]
- if arg in accumulables:
- return True
-
- return False
-
-
-def _is_valid_attr(args, attr):
- return hasattr(args, attr) and getattr(args, attr)
-
-
-class Command:
- def __init__(self, driver, args, log_file):
- self.cmd = [driver]
- self.driver = driver
- self.args = args
- self.log_file = log_file
-
- # Add option if attrs are valid
- # Option values are collected from self.args
- def add_option_with_valid_args(self, option, attrs):
- for attr in attrs:
- if not _is_valid_attr(self.args, attr):
- return self
- self.cmd.append(option)
- for attr in attrs:
- self.cmd.append(getattr(self.args, attr))
- return self
-
- # Add option and values without any condition
- def add_option_with_values(self, option, values):
- self.cmd.append(option)
- for value in values:
- self.cmd.append(value)
- return self
-
- # Add option with no argument (ex: --verbose) if attr is valid
- def add_noarg_option_if_valid_arg(self, option, attr):
- if _is_valid_attr(self.args, attr):
- self.cmd.append(option)
- return self
-
- # Run cmd and save logs
- def run(self):
- self.log_file.write((' '.join(self.cmd) + '\n').encode())
- _run(self.cmd, err_prefix=self.driver, logfile=self.log_file)
-
-
-def _parse_cfg_and_overwrite(config_path, section, args):
- """
- parse given section of configuration file and set the values of args.
- Even if the values parsed from the configuration file already exist in args,
- the values are overwritten.
- """
- if config_path == None:
- # DO NOTHING
- return
- config = configparser.ConfigParser()
- # make option names case sensitive
- config.optionxform = str
- parsed = config.read(config_path)
- if not parsed:
- raise FileNotFoundError('Not found given configuration file')
- if not config.has_section(section):
- raise AssertionError('configuration file doesn\'t have \'' + section +
- '\' section')
- for key in config[section]:
- setattr(args, key, config[section][key])
- # TODO support accumulated arguments
-
-
-def _parse_cfg(args, driver_name):
- """parse configuration file. If the option is directly given to the command line,
- the option is processed prior to the configuration file.
- That is, if the values parsed from the configuration file already exist in args,
- the values are ignored."""
- if _is_valid_attr(args, 'config'):
- config = configparser.ConfigParser()
- config.optionxform = str
- config.read(args.config)
- # if section is given, verify given section
- if _is_valid_attr(args, 'section'):
- if not config.has_section(args.section):
- raise AssertionError('configuration file must have \'' + driver_name +
- '\' section')
- for key in config[args.section]:
- if is_accumulated_arg(key, driver_name):
- if not _is_valid_attr(args, key):
- setattr(args, key, [config[args.section][key]])
- else:
- getattr(args, key).append(config[args.section][key])
- continue
- if not _is_valid_attr(args, key):
- setattr(args, key, config[args.section][key])
- # if section is not given, section name is same with its driver name
- else:
- if not config.has_section(driver_name):
- raise AssertionError('configuration file must have \'' + driver_name +
- '\' section')
- secton_to_run = driver_name
- for key in config[secton_to_run]:
- if is_accumulated_arg(key, driver_name):
- if not _is_valid_attr(args, key):
- setattr(args, key, [config[secton_to_run][key]])
- else:
- getattr(args, key).append(config[secton_to_run][key])
- continue
- if not _is_valid_attr(args, key):
- setattr(args, key, config[secton_to_run][key])
-
-
-def _print_version_and_exit(file_path):
- """print version of the file located in the file_path"""
- script_path = os.path.realpath(file_path)
- dir_path = os.path.dirname(script_path)
- script_name = os.path.splitext(os.path.basename(script_path))[0]
- # run one-version
- subprocess.call([os.path.join(dir_path, 'one-version'), script_name])
- sys.exit()
-
-
-def _safemain(main, mainpath):
- """execute given method and print with program name for all uncaught exceptions"""
- try:
- main()
- except Exception as e:
- prog_name = os.path.basename(mainpath)
- print(f"{prog_name}: {type(e).__name__}: " + str(e), file=sys.stderr)
- sys.exit(255)
-
-
-def _run(cmd, err_prefix=None, logfile=None):
- """Execute command in subprocess
-
- Args:
- cmd: command to be executed in subprocess
- err_prefix: prefix to be put before every stderr lines
- logfile: file stream to which both of stdout and stderr lines will be written
- """
- with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p:
- import select
- inputs = set([p.stdout, p.stderr])
- while inputs:
- readable, _, _ = select.select(inputs, [], [])
- for x in readable:
- line = x.readline()
- if len(line) == 0:
- inputs.discard(x)
- continue
- if x == p.stdout:
- out = sys.stdout
- if x == p.stderr:
- out = sys.stderr
- if err_prefix:
- line = f"{err_prefix}: ".encode() + line
- out.buffer.write(line)
- out.buffer.flush()
- if logfile != None:
- logfile.write(line)
- if p.returncode != 0:
- sys.exit(p.returncode)
-
-
-def _remove_prefix(str, prefix):
- if str.startswith(prefix):
- return str[len(prefix):]
- return str
-
-
-def _remove_suffix(str, suffix):
- if str.endswith(suffix):
- return str[:-len(suffix)]
- return str
-
-
-def _get_optimization_list(get_name=False):
- """
- returns a list of optimization. If `get_name` is True,
- only basename without extension is returned rather than full file path.
-
- [one hierarchy]
- one
- ├── backends
- ├── bin
- ├── doc
- ├── include
- ├── lib
- ├── optimization
- └── test
-
- Optimization options must be placed in `optimization` folder
- """
- dir_path = os.path.dirname(os.path.realpath(__file__))
-
- # optimization folder
- files = [f for f in glob.glob(dir_path + '/../optimization/O*.cfg', recursive=True)]
- # exclude if the name has space
- files = [s for s in files if not ' ' in s]
-
- opt_list = []
- for cand in files:
- base = ntpath.basename(cand)
- if os.path.isfile(cand) and os.access(cand, os.R_OK):
- opt_list.append(cand)
-
- if get_name == True:
- # NOTE the name includes prefix 'O'
- # e.g. O1, O2, ONCHW not just 1, 2, NCHW
- opt_list = [ntpath.basename(f) for f in opt_list]
- opt_list = [_remove_suffix(s, '.cfg') for s in opt_list]
-
- return opt_list
-
-
-def _detect_one_import_drivers(search_path):
- """Looks for import drivers in given directory
-
- Args:
- search_path: path to the directory where to search import drivers
-
- Returns:
- dict: each entry is related to single detected driver,
- key is a config section name, value is a driver name
-
- """
- import_drivers_dict = {}
- for module_name in os.listdir(search_path):
- full_path = os.path.join(search_path, module_name)
- if not os.path.isfile(full_path):
- continue
- if module_name.find("one-import-") != 0:
- continue
- module_loader = importlib.machinery.SourceFileLoader(module_name, full_path)
- module_spec = importlib.util.spec_from_loader(module_name, module_loader)
- module = importlib.util.module_from_spec(module_spec)
- try:
- module_loader.exec_module(module)
- if hasattr(module, "get_driver_cfg_section"):
- section = module.get_driver_cfg_section()
- import_drivers_dict[section] = module_name
- except:
- pass
- return import_drivers_dict
--- /dev/null
+# onecc-docker
+
+_onecc-docker_ broadens ONE tools to be used in other platforms.
+
+## Description
+
+For now, ONE tools only support Ubuntu 18.04 and 20.04(not officially).
+So, it is difficult for people in different environments to use our tools without using ubuntu 18.04.
+
+To overcome this limitation, we provide _onecc-docker_ that runs using a Docker so that users can use ONE tools more widely.
+
+This tool aims at the following objectives.
+
+- Unsupported Ubuntu OS supports ONE tools
+- Install and use ONE tools lightly and quickly using Docker
+
+## Requirements
+
+- Any Linux distribution
+- Docker
+ - Requires root privileges.
+ - _onecc-docker_ requires the current `user ID` to be included in the `Docker group` because it requires the Docker-related commands to be executed without `sudo` privileges.
+ - See "[Post-installation steps for Linux](https://docs.docker.com/engine/install/linux-postinstall/)"
+- Python 3.8
+ - requests
+
+## Note
+
+_onecc-docker_ is currently in incubation stage.
+
+The onecc-docker debian package should be created with one-compiler debian package when ONE
+compiler project builds. To this end, it is correct to configure the onecc-docker debian codes in
+./infra/debian/compiler directory. However, we are currently working on the code, so we will
+temporarily implement it in this location.
+
+TODO: Merge this debian directory into ./infra/debian/compiler code.
--- /dev/null
+onecc-docker (0.1.0) bionic; urgency=medium
+
+ * Introduce onecc-docker
+
+ -- Seunghui Lee <dltmdgml456654@gmail.com> Wed, 23 Sep 2022 12:00:00 +0900
+
--- /dev/null
+Source: onecc-docker
+Section: devel
+Priority: extra
+Maintainer: Neural Network Acceleration Solution Developers <nnfw@samsung.com>
+Build-Depends: debhelper (>=9)
+Standards-Version: 4.5.1
+Homepage: https://github.com/Samsung/ONE
+
+Package: onecc-docker
+Architecture: amd64
+Multi-Arch: foreign
+Depends: ${misc:Depends}, ${shlibs:Depends}, python3.8
+Description: On-device Neural Engine docker package
--- /dev/null
+Files: *
+License: Proprietary
+Copyright (c) <2022> <Samsung Electronics Co.,Ltd.>
--- /dev/null
+compiler/onecc-docker/onecc-docker /usr/share/one/bin/
+compiler/onecc-docker/docker/Dockerfile /usr/share/one/bin/docker/
--- /dev/null
+/usr/share/one/bin/onecc-docker /usr/bin/onecc-docker
--- /dev/null
+#!/usr/bin/make -f
+
+%:
+ dh $@
+
+override_dh_fixperms:
+ dh_fixperms
+ chmod +x debian/onecc-docker/usr/share/one/bin/onecc-docker
--- /dev/null
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License
+
+FROM ubuntu:18.04
+
+ARG VERSION
+
+RUN apt-get update && apt-get install -qqy --no-install-recommends \
+ wget \
+ ca-certificates \
+ && wget https://github.com/Samsung/ONE/releases/download/${VERSION}/one-compiler_${VERSION}_amd64.deb \
+ && apt-get install -y ./one-compiler_${VERSION}_amd64.deb \
+ && rm -rf /var/lib/apt/lists/*
+
+ENTRYPOINT ["onecc"]
--- /dev/null
+#!/usr/bin/env python3
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import subprocess
+import json
+import requests
+import os
+import argparse
+
+
+def _run(cmd, is_shell=False):
+ result = subprocess.Popen(
+ cmd, shell=is_shell, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+ stdout, stderr = result.communicate()
+ stdout = stdout.decode('utf-8')
+ stderr = stderr.decode('utf-8')
+
+ if result.returncode:
+ print(stderr, end='')
+ exit(result.returncode)
+ else:
+ return stdout
+
+
+def _image_exists(name):
+ cmd = ['docker', 'images', '-q', name]
+ lines = _run(cmd).splitlines()
+ return lines
+
+
+def main():
+ script_path = os.path.dirname(os.path.realpath(__file__))
+ dockerfile_path = os.path.join(script_path, 'docker')
+
+ onecc_docker_usage = 'onecc-docker [-h] [-t TOKEN] [COMMAND <args>]'
+ onecc_docker_desc = 'Run onecc via docker'
+ parser = argparse.ArgumentParser(
+ usage=onecc_docker_usage, description=onecc_docker_desc)
+ parser.add_argument(
+ "-t",
+ "--token",
+ help=
+ "Token for authentication to GitHub. This is a workaround for Rate limit exceeded error"
+ )
+
+ args, onecc_arguments = parser.parse_known_args()
+ authorization_token = args.token
+
+ LATEST_URL = "https://api.github.com/repos/Samsung/ONE/releases/latest"
+ headers = {}
+ if authorization_token:
+ headers = {"Authorization": "Bearer {}".format(authorization_token)}
+ try:
+ response = requests.get(LATEST_URL, headers=headers)
+ response.raise_for_status()
+ except requests.exceptions.RequestException as e:
+ raise SystemExit('onecc-docker: error: {}'.format(e))
+
+ versions_str = response.content
+ versions_json = json.loads(versions_str)
+ recent_version = versions_json["tag_name"]
+
+ image_name = f"onecc:{recent_version}"
+ build_arg = f"VERSION={recent_version}"
+
+ if not _image_exists(image_name):
+ build_cmd = [
+ "docker", "build", "-t", image_name, "--build-arg", build_arg, dockerfile_path
+ ]
+ print('build Docker image ...')
+ _run(build_cmd)
+ print('Dockerfile successfully built.')
+
+ contianer_name = f"onecc_{recent_version.replace('.','_')}"
+ user_cmd = ' '.join(onecc_arguments)
+
+ run_cmd = [
+ "docker", "run", "--rm", "-u", "$(id -u):$(id -g)", "--name", contianer_name,
+ "-v", "${HOME}:${HOME}", "-e", "HOME=${HOME}", "-w", "${PWD}", image_name,
+ user_cmd
+ ]
+
+ cmd = ' '.join(run_cmd)
+ output = _run(cmd, is_shell=True)
+ print(output, end='')
+
+
+if __name__ == "__main__":
+ try:
+ main()
+ except Exception as e:
+ prog_name = os.path.basename(__file__)
+ print(f"{prog_name}: {type(e).__name__}: " + str(e), file=sys.stderr)
+ sys.exit(255)
--- /dev/null
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+if(NOT FlatBuffers_FOUND)
+ message(STATUS "Configure pics: FAILED (missing FlatBuffers)")
+ return()
+endif(NOT FlatBuffers_FOUND)
+
+unset(PICS_DEPS)
+
+###
+### Generate python interface for circle schema
+###
+set(CIRCLE_SCHEMA_PYTHON_DIR "${CMAKE_CURRENT_BINARY_DIR}/circle")
+
+get_target_property(SCHEMA_BIN_PATH mio_circle04 BINARY_DIR)
+
+add_custom_command(
+ OUTPUT ${CIRCLE_SCHEMA_PYTHON_DIR}
+ COMMAND "$<TARGET_FILE:flatbuffers::flatc>" --python
+ -o "${CMAKE_CURRENT_BINARY_DIR}" "${SCHEMA_BIN_PATH}/schema.fbs"
+ DEPENDS flatbuffers::flatc
+ COMMENT "Generate python interface for circle schema"
+)
+
+list(APPEND PICS_DEPS "${CIRCLE_SCHEMA_PYTHON_DIR}")
+
+# This enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(pics ALL DEPENDS ${PICS_DEPS})
+
+install(DIRECTORY ${CIRCLE_SCHEMA_PYTHON_DIR}
+ FILE_PERMISSIONS OWNER_WRITE OWNER_READ
+ GROUP_READ
+ WORLD_READ
+ DESTINATION bin)
--- /dev/null
+# pics
+
+_pics_ is flatbuffer Python interface for circle schema.
+
+## How to use pics in your module?
+
+Add below lines to your module's `CMakeLists.txt`. It will create a symbolic link to `circle` directory under your module's binary directory.
+
+```
+get_target_property(PICS_BIN_PATH pics BINARY_DIR)
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/circle
+ COMMAND ${CMAKE_COMMAND} -E create_symlink
+ ${PICS_BIN_PATH}/circle ${CMAKE_CURRENT_BINARY_DIR}/circle)
+
+# Add dependency to ${CMAKE_CURRENT_BINARY_DIR}/circle
+```
unset(QUANTIZATION_CONFIG_VALUE_TEST)
unset(QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM)
-nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
-if(NOT FlatBuffers_FOUND)
- message(STATUS "Build pota-quantization-value-test: FAILED (missing FlatBuffers)")
- return()
-endif(NOT FlatBuffers_FOUND)
-
macro(addTest NAME GRANULARITY DTYPE)
list(APPEND QUANTIZATION_VALUE_TEST ${NAME})
list(APPEND QUANTIZATION_VALUE_TEST_WITH_PARAM ${NAME} ${GRANULARITY} ${DTYPE})
unset(TEST_DEPS)
get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
-get_target_property(SCHEMA_BIN_PATH mio_circle04 BINARY_DIR)
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/gen_h5_explicit_inputs.py"
"${CMAKE_CURRENT_BINARY_DIR}/gen_h5_explicit_inputs.py" COPYONLY)
-set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_8_0")
+# TODO Run both 2.8.0 and 2.10.1 test for jammy
+if(ONE_UBUNTU_CODENAME_JAMMY)
+ set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_10_1")
+else(ONE_UBUNTU_CODENAME_JAMMY)
+ set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_8_0")
+endif(ONE_UBUNTU_CODENAME_JAMMY)
###
### Generate test.config
COMMENT "Generate test configuration"
)
-###
-### Generate python interface for circle schema
-###
-set(CIRCLE_SCHEMA_PYTHON_DIR "${CMAKE_CURRENT_BINARY_DIR}/circle")
-
-add_custom_command(
- OUTPUT ${CIRCLE_SCHEMA_PYTHON_DIR}
- COMMAND ${CMAKE_COMMAND} -E remove_directory "${CIRCLE_SCHEMA_PYTHON_DIR}"
- COMMAND "$<TARGET_FILE:flatbuffers::flatc>" --python
- -o "${CMAKE_CURRENT_BINARY_DIR}" "${SCHEMA_BIN_PATH}/schema.fbs"
- DEPENDS flatbuffers::flatc
- COMMENT "Generate python interface for circle schema"
-)
+# Import pics module
+get_target_property(PICS_BIN_PATH pics BINARY_DIR)
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/circle
+ COMMAND ${CMAKE_COMMAND} -E create_symlink
+ ${PICS_BIN_PATH}/circle ${CMAKE_CURRENT_BINARY_DIR}/circle)
-list(APPEND TEST_DEPS "${TEST_CONFIG}" "${CIRCLE_SCHEMA_PYTHON_DIR}")
+list(APPEND TEST_DEPS "${TEST_CONFIG}" "${CMAKE_CURRENT_BINARY_DIR}/circle")
# This enforces CMake to generate all the dependencies during "build" phase
add_custom_target(pota_quantization_value_test_deps ALL DEPENDS ${TEST_DEPS})
)
add_test(
+ NAME pota_parallel_record_minmax_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_parallel_record_minmax.sh"
+ "${TEST_CONFIG}"
+ "${ARTIFACTS_BIN_PATH}"
+ ${QUANTIZATION_VALUE_TEST_WITH_PARAM}
+)
+
+add_test(
NAME pota_quantization_test_with_config
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_quantization_with_config.sh"
"${TEST_CONFIG}"
set_tests_properties(pota_record_minmax_test PROPERTIES DEPENDS pota_fake_wquant_test)
set_tests_properties(pota_quantization_test PROPERTIES DEPENDS pota_record_minmax_test)
+set_tests_properties(pota_parallel_record_minmax_test PROPERTIES DEPENDS pota_record_minmax_test)
set_tests_properties(pota_quantization_test_with_config PROPERTIES DEPENDS pota_fake_wquant_test_with_config)
require("circle-tensordump")
require("common-artifacts")
require("mio-circle04")
+require("pics")
--- /dev/null
+#!/bin/bash
+
+# This script tests the parallel behavior of record-minmax
+#
+# HOW TO USE
+#
+# ./test_parallel_record_minmax.sh <path/to/test.config> <path/to/work_dir> <TEST 1> <TEST 2> ...
+# test.config : set ${RECORD_MINMAX_PATH} and ${CIRCLE2CIRCLE_PATH}
+# work_dir : build directory of quantization-value-test (ex: build/compiler/quantization-value-test)
+
+SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+COMPARE_SCRIPT_PATH="${SOURCE_PATH}/compare_tensors.py"
+CONFIG_PATH="$1"; shift
+BIN_PATH=$(dirname "${CONFIG_PATH}")
+TEST_INPUT_PATH="${SOURCE_PATH}/test_inputs"
+GEN_SCRIPT_PATH="${BIN_PATH}/gen_h5_explicit_inputs.py"
+WORKDIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found RECORD-MINMAX: ${RECORD_MINMAX_PATH}"
+echo "-- Found CIRCLE_TENSORDUMP: ${CIRCLE_TENSORDUMP_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [ "$1" != "" ]; do
+ MODELNAME=$1; shift
+ GRANULARITY=$1; shift
+ DTYPE=$1; shift
+ TESTCASE="${MODELNAME}.${GRANULARITY}.${DTYPE}"
+
+ TESTED+=("${TESTCASE}")
+
+ TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+ TEST_RESULT_FILE="${BIN_PATH}/${TESTCASE}"
+
+ PASSED_TAG="${TEST_RESULT_FILE}.parallel_record_minmax.passed"
+ rm -f "${PASSED_TAG}"
+
+ cat > "${TEST_RESULT_FILE}_parallel_record_minmax.log" <(
+ exec 2>&1
+ set -ex
+ # Generate h5 input data
+ source "${VIRTUALENV}/bin/activate"
+ "${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \
+ --model "${WORKDIR}/${MODELNAME}.circle" \
+ --input "${TEST_INPUT_PATH}/${MODELNAME}/${GRANULARITY}/${DTYPE}" \
+ --output "${TESTCASE_FILE}.input.h5"
+ if [[ $? -ne 0 ]]; then
+ echo "FAILED TO GENERATE INPUT"
+ continue
+ fi
+ # Run parallel record-minmax
+ "${RECORD_MINMAX_PATH}" \
+ --input_model "${TEST_RESULT_FILE}.fake_quantized.circle" \
+ --input_data "${TESTCASE_FILE}.input.h5" \
+ --output_model "${TEST_RESULT_FILE}.parallel_minmax_recorded.circle" \
+ --num_threads 4
+ # Dump min/max values (circle-tensordump)
+ "${CIRCLE_TENSORDUMP_PATH}" \
+ "${TEST_RESULT_FILE}.parallel_minmax_recorded.circle" \
+ --tensors_to_hdf5 "${TEST_RESULT_FILE}.parallel_minmax_recorded.circle.h5"
+ # Compare result
+ "${VIRTUALENV}/bin/python" "${COMPARE_SCRIPT_PATH}" \
+ --input_h5 "${TEST_RESULT_FILE}.parallel_minmax_recorded.circle.h5" \
+ --expect_dir "${SOURCE_PATH}/expected_outputs/${MODELNAME}/${GRANULARITY}/${DTYPE}/record_minmax" \
+ --mode record_minmax
+ if [[ $? -eq 0 ]]; then
+ touch "${PASSED_TAG}"
+ fi
+ )
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$TESTCASE")
+ else
+ FAILED+=("$TESTCASE")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
"${NNCC_OVERLAY_DIR}/venv_2_8_0"
${RECORD_MINMAX_CONVERSION_TEST}
)
+
+if(ONE_UBUNTU_CODENAME_JAMMY)
+ add_test(
+ NAME record_minmax_conversion_210_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh"
+ "${TEST_CONFIG}"
+ "${ARTIFACTS_BIN_PATH}"
+ "${NNCC_OVERLAY_DIR}/venv_2_10_1"
+ ${RECORD_MINMAX_CONVERSION_TEST}
+ )
+endif(ONE_UBUNTU_CODENAME_JAMMY)
--- /dev/null
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+# Build record-minmax-for-thread-test if target arch is 64bit
+# Thread sanitizer is only available on 64bit machine
+# (https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual#supported-platforms)
+if(NOT "${CMAKE_SIZEOF_VOID_P}" STREQUAL "8")
+ return()
+endif(NOT "${CMAKE_SIZEOF_VOID_P}" STREQUAL "8")
+
+unset(RECORD_MINMAX_THREAD_SAFETY_TEST)
+
+macro(addTest NAME)
+ list(APPEND RECORD_MINMAX_THREAD_SAFETY_TEST ${NAME})
+endmacro(addTest)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+unset(TEST_DEPS)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+###
+### Generate test.config
+###
+set(TEST_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/test.config")
+
+add_custom_command(
+ OUTPUT "${MICRO_ARM_BUILD_DEPENDENCY}"
+ COMMAND "${CMAKE_COMMAND}" "${CMAKE_CURRENT_SOURCE_DIR}/standalone" ${CMAKE_ARM_OPTIONS}
+ WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
+ DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/standalone/CMakeLists.txt"
+ VERBATIM
+)
+set(RECORD_MINMAX_PATH "$<TARGET_FILE:record-minmax-for-thread-test>")
+
+add_custom_command(
+ OUTPUT ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'RECORD_MINMAX_PATH=\"$<TARGET_FILE:record-minmax-for-thread-test>\"' >> ${TEST_CONFIG}
+ DEPENDS record-minmax-for-thread-test
+ COMMENT "Generate test configuration"
+)
+
+list(APPEND TEST_DEPS "${TEST_CONFIG}")
+
+# This enforces CMake to generate all the dependencies during "build" phase
+add_custom_target(record_minmax_thread_safety_test_deps ALL DEPENDS ${TEST_DEPS})
+
+# Run tests
+add_test(
+ NAME record_minmax_thread_safety_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh"
+ "${TEST_CONFIG}"
+ "${ARTIFACTS_BIN_PATH}"
+ "${NNCC_OVERLAY_DIR}/venv_2_8_0"
+ ${RECORD_MINMAX_THREAD_SAFETY_TEST}
+)
+
+if(ONE_UBUNTU_CODENAME_JAMMY)
+ add_test(
+ NAME record_minmax_thread_safety_210_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh"
+ "${TEST_CONFIG}"
+ "${ARTIFACTS_BIN_PATH}"
+ "${NNCC_OVERLAY_DIR}/venv_2_10_1"
+ ${RECORD_MINMAX_THREAD_SAFETY_TEST}
+ )
+endif(ONE_UBUNTU_CODENAME_JAMMY)
--- /dev/null
+#!/usr/bin/env python3
+import h5py as h5
+import numpy as np
+import tensorflow as tf
+import argparse
+
+#
+# This script generates a pack of random input data (.h5) expected by the input tflite model
+#
+# Basic usage:
+# gen_h5_inputs.py --model <path/to/tflite/model> --num_data <number/of/data> --output <path/to/output/data>
+# ex: gen_h5_inputs.py --model add.tflite --num_data 3 --output add.tflite.input.h5
+# (This will create add.tflite.input.h5 composed of three random inputs in the same directory as the model)
+parser = argparse.ArgumentParser()
+parser.add_argument('--model', type=str, required=True)
+parser.add_argument('--num_data', type=int, required=True)
+parser.add_argument('--output', type=str, required=True)
+args = parser.parse_args()
+
+model = args.model
+
+num_data = args.num_data
+
+output_path = args.output
+
+# Build TFLite interpreter. (to get the information of model input)
+interpreter = tf.lite.Interpreter(model)
+input_details = interpreter.get_input_details()
+
+# Create h5 file
+h5_file = h5.File(output_path, 'w')
+group = h5_file.create_group("value")
+group.attrs['desc'] = "Input data for " + model
+
+# Generate random data
+for i in range(num_data):
+ sample = group.create_group(str(i))
+ sample.attrs['desc'] = "Input data " + str(i)
+
+ for j in range(len(input_details)):
+ input_detail = input_details[j]
+ print(input_detail["dtype"])
+ if input_detail["dtype"] == np.bool_:
+ # Generate random bool [0, 1]
+ input_data = np.array(
+ np.random.random_integers(0, 1, input_detail["shape"]),
+ input_detail["dtype"])
+ elif input_detail["dtype"] == np.float32:
+ # Generate random input [-5, 5)
+ input_data = np.array(10 * np.random.random_sample(input_detail["shape"]) - 5,
+ input_detail["dtype"])
+ sample.create_dataset(str(j), data=input_data)
+
+h5_file.close()
--- /dev/null
+require("common-artifacts")
+require("record-minmax")
--- /dev/null
+addTest(Add_000)
+addTest(AveragePool2D_000)
+addTest(Concatenation_000)
+addTest(Conv2D_000)
+addTest(Conv2D_001)
+addTest(Conv2D_002)
+addTest(DepthwiseConv2D_000)
+addTest(FullyConnected_000)
+addTest(FullyConnected_001)
+addTest(MaxPool2D_000)
+addTest(Mul_000)
+addTest(Pad_000)
+addTest(Reshape_000)
+addTest(Reshape_001)
+addTest(Reshape_002)
+addTest(Softmax_000)
--- /dev/null
+#!/bin/bash
+
+# This script tests the parallel behavior of record-minmax
+#
+# HOW TO USE
+#
+# ./testall.sh <path/to/test.config> <path/to/work_dir> <TEST 1> <TEST 2> ...
+# test.config : set ${RECORD_MINMAX_PATH}
+# work_dir : build directory of record-minmax-thread-safety-test (ex: build/compiler/record-minmax-thread-safety-test)
+
+GEN_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+GEN_SCRIPT_PATH="${GEN_SOURCE_PATH}/gen_h5_random_inputs.py"
+CONFIG_PATH="$1"; shift
+BIN_PATH=$(dirname "$CONFIG_PATH")
+WORKDIR="$1"; shift
+VIRTUALENV="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found RECORD-MINMAX: ${RECORD_MINMAX_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+for TESTCASE in "$@"; do
+ TESTED+=("${TESTCASE}")
+
+ TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+
+ PASSED_TAG="${BIN_PATH}/${TESTCASE}.passed"
+ rm -f "${PASSED_TAG}"
+
+ cat > "${BIN_PATH}/${TESTCASE}.log" <(
+ exec 2>&1
+ set -ex
+ # Generate h5 input data
+ source "${VIRTUALENV}/bin/activate"
+ "${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \
+ --model "${TESTCASE_FILE}.tflite" \
+ --num_data 8 \
+ --output "${BIN_PATH}/${TESTCASE}.tflite.input.h5"
+ if [[ $? -ne 0 ]]; then
+ echo "FAILED TO GENERATE INPUT"
+ continue
+ fi
+ # Run record-minmax in parallel mode
+ "${RECORD_MINMAX_PATH}" \
+ --input_model "${TESTCASE_FILE}.circle" \
+ --input_data "${BIN_PATH}/${TESTCASE}.tflite.input.h5" \
+ --output_model "${BIN_PATH}/${TESTCASE}.out.circle" \
+ --num_threads 4
+ if [[ $? -ne 0 ]]; then
+ echo "FAILED TO GENERATE CIRCLE OUTPUT"
+ continue
+ fi
+ )
+
+ if ! grep -q "ThreadSanitizer: data race" "${BIN_PATH}/${TESTCASE}.log"; then
+ touch "${PASSED_TAG}"
+ fi
+
+ if [[ -f "${PASSED_TAG}" ]]; then
+ PASSED+=("$TESTCASE")
+ else
+ FAILED+=("$TESTCASE")
+ fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+ echo "FAILED"
+ for TEST in "${FAILED[@]}"
+ do
+ echo "- ${TEST}"
+ done
+ exit 255
+fi
+
+echo "PASSED"
+exit 0
target_link_libraries(record-minmax luci_env)
target_link_libraries(record-minmax luci_export)
target_link_libraries(record-minmax luci_interpreter)
+target_link_libraries(record-minmax luci_log)
target_link_libraries(record-minmax dio_hdf5)
target_link_libraries(record-minmax vconone)
target_link_libraries(record-minmax nncc_coverage)
+target_link_libraries(record-minmax nncc_common)
install(TARGETS record-minmax DESTINATION bin)
return()
endif(NOT ENABLE_TEST)
+# Build record-minmax-for-thread-test if target arch is 64bit
+# Thread sanitizer is only available on 64bit machine
+# (https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual#supported-platforms)
+if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "8")
+ # create record-minmax-for-thread-test target
+ # Note: record-minmax-for-thread-test is built with -fsanitize=thread so that thread sanitizer can check memory bugs,
+ # record-minmax is built without the option for performance.
+ add_executable(record-minmax-for-thread-test ${DRIVER} ${SOURCES})
+ target_include_directories(record-minmax-for-thread-test PRIVATE include)
+
+ target_link_libraries(record-minmax-for-thread-test arser)
+ target_link_libraries(record-minmax-for-thread-test safemain)
+ target_link_libraries(record-minmax-for-thread-test luci_import)
+ target_link_libraries(record-minmax-for-thread-test luci_env)
+ target_link_libraries(record-minmax-for-thread-test luci_export)
+ target_link_libraries(record-minmax-for-thread-test luci_interpreter)
+ target_link_libraries(record-minmax-for-thread-test dio_hdf5)
+ target_link_libraries(record-minmax-for-thread-test vconone)
+ target_link_libraries(record-minmax-for-thread-test nncc_coverage)
+ target_link_libraries(record-minmax-for-thread-test luci_log)
+
+ target_compile_options(record-minmax-for-thread-test PUBLIC -fsanitize=thread)
+ target_link_libraries(record-minmax-for-thread-test -fsanitize=thread)
+endif("${CMAKE_SIZEOF_VOID_P}" STREQUAL "8")
+
file(GLOB_RECURSE TESTS "tests/*.test.cpp")
nnas_find_package(GTest REQUIRED)
#include <luci/UserSettings.h>
+// TODO declare own log signature of record-minmax
+#include <luci/Log.h>
+
void print_version(void)
{
std::cout << "record-minmax version " << vconone::get_string() << std::endl;
{
using namespace record_minmax;
+ LOGGER(l);
+
arser::Arser arser(
"Embedding min/max values of activations to the circle model for post-training quantization");
.type(arser::DataType::FLOAT)
.help("Record n'th percentile of min");
+ arser.add_argument("--num_threads")
+ .type(arser::DataType::INT32)
+ .help("Number of threads (default: 1)");
+
arser.add_argument("--max_percentile")
.type(arser::DataType::FLOAT)
.help("Record n'th percentile of max");
float min_percentile = 1.0;
float max_percentile = 99.0;
std::string input_data_format("h5");
+ uint32_t num_threads = 1;
if (arser["--min_percentile"])
min_percentile = arser.get<float>("--min_percentile");
+ if (arser["--num_threads"])
+ num_threads = arser.get<int>("--num_threads");
+
+ if (num_threads < 1)
+ throw std::runtime_error("The number of threads must be greater than zero");
+
if (arser["--max_percentile"])
max_percentile = arser.get<float>("--max_percentile");
if (arser["--input_data_format"])
input_data_format = arser.get<std::string>("--input_data_format");
- RecordMinMax rmm;
+ RecordMinMax rmm(num_threads);
+
+ // TODO: support parallel record for profile with random data
+ if (num_threads > 1 and not arser["--input_data"])
+ {
+ throw std::runtime_error("Input data must be given for parallel recording");
+ }
// Initialize interpreter and observer
rmm.initialize(input_model_path);
{
auto input_data_path = arser.get<std::string>("--input_data");
+ // TODO: support parallel record from file and dir input data format
+ if (num_threads > 1 and not(input_data_format == "h5") and not(input_data_format == "hdf5"))
+ {
+ throw std::runtime_error("Parallel recording is used only for h5 now");
+ }
+
if (input_data_format == "h5" || input_data_format == "hdf5")
{
// Profile min/max while executing the H5 data
- rmm.profileData(mode, input_data_path, min_percentile, max_percentile);
+ if (num_threads == 1)
+ rmm.profileData(mode, input_data_path, min_percentile, max_percentile);
+ else
+ {
+ INFO(l) << "Using parallel recording" << std::endl;
+ rmm.profileDataInParallel(mode, input_data_path, min_percentile, max_percentile);
+ }
}
// input_data is a text file having a file path in each line.
// Each data file is composed of inputs of a model, concatenated in
vectors.max_vector.push_back(max);
}
+ void appendMinMaxVector(const luci::CircleNode *node, const MinMaxVectors &minmax_vector)
+ {
+ MinMaxVectors &vectors = _minmax_map[node];
+ vectors.min_vector.insert(vectors.min_vector.end(), minmax_vector.min_vector.begin(),
+ minmax_vector.min_vector.end());
+ vectors.max_vector.insert(vectors.max_vector.end(), minmax_vector.max_vector.begin(),
+ minmax_vector.max_vector.end());
+ }
+
const std::unordered_map<const luci::CircleNode *, MinMaxVectors> *getMap() const
{
return &_minmax_map;
assert(alpha >= 0.0 && alpha <= 1.0);
assert(batch_size > 0);
- auto getBatchMinOrMax = [&](int start_index) {
- assert(start_index >= 0 && start_index < vector.size());
+ auto getBatchMinOrMax = [&](uint32_t start_index) {
+ assert(start_index < vector.size());
float res = is_min ? std::numeric_limits<float>::max() : std::numeric_limits<float>::lowest();
- for (int offset = 0; offset < batch_size; offset++)
+ for (uint32_t offset = 0; offset < batch_size; offset++)
{
- int index = start_index + offset;
+ uint32_t index = start_index + offset;
if (index >= vector.size())
break;
};
float curr_avg = getBatchMinOrMax(0);
- for (size_t i = batch_size; i < vector.size(); i += batch_size)
+ for (uint32_t i = batch_size; i < vector.size(); i += batch_size)
{
curr_avg = curr_avg * alpha + getBatchMinOrMax(i) * (1.0 - alpha);
}
#include "MinMaxObserver.h"
#include <memory>
+#include <thread>
namespace record_minmax
{
+using Buffer = std::vector<char>;
+using Output = std::vector<Buffer>;
+using WholeOutput = std::vector<Output>;
+
class RecordMinMax
{
public:
- explicit RecordMinMax() = default;
+ explicit RecordMinMax(uint32_t num_threads) : _threads_size(num_threads)
+ {
+ assert(_threads_size > 0);
+ }
~RecordMinMax() = default;
void profileData(const std::string &mode, const std::string &input_data_path,
float min_percentile, float max_percentile);
+ void profileDataInParallel(const std::string &mode, const std::string &input_data_path,
+ float min_percentile, float max_percentile);
+
void profileRawData(const std::string &mode, const std::string &input_data_path,
float min_percentile, float max_percentile);
void saveModel(const std::string &output_model_path);
private:
+ luci_interpreter::Interpreter *getInterpreter() const { return _interpreters[0].get(); }
+ MinMaxObserver *getObserver() const { return _observers[0].get(); }
+
+ WholeOutput importH5Data(const std::string &input_data_path);
+
std::unique_ptr<luci::Module> _module;
- std::unique_ptr<luci_interpreter::Interpreter> _interpreter;
- std::unique_ptr<MinMaxObserver> _observer;
+
+ // Multiple interpreters are used for parallel execution
+ std::vector<std::unique_ptr<luci_interpreter::Interpreter>> _interpreters;
+ std::vector<std::unique_ptr<MinMaxObserver>> _observers;
+
+ uint32_t _threads_size = 0;
};
} // namespace record_minmax
#include <luci/CircleExporter.h>
#include <luci/CircleFileExpContract.h>
#include <luci/IR/CircleQuantParam.h>
+#include <luci/Log.h>
#include <dio_hdf5/HDF5Importer.h>
#include <dirent.h>
namespace
{
+// Max h5 file size for parallel recording in bytes = 1 GB
+const long h5_max_size_bytes = 1000000000;
+
+long getH5FileSize(const std::string &input_data_path)
+{
+ std::ifstream in_file(input_data_path, std::ios::binary);
+ in_file.seekg(0, std::ios::end);
+
+ return in_file.tellg();
+}
+
uint32_t numElements(const luci::CircleNode *node)
{
uint32_t num_elements = 1;
throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
if (fs.read(data.data(), data_size).fail())
throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
+ if (fs.peek() != EOF)
+ throw std::runtime_error("Input tensor size mismatches with \"" + filename + "\".\n");
}
std::vector<uint8_t> genRandomBoolData(std::mt19937 &gen, uint32_t num_elements)
void RecordMinMax::initialize(const std::string &input_model_path)
{
+ assert(_threads_size > 0);
+
// Load model from the file
std::ifstream fs(input_model_path, std::ifstream::binary);
if (fs.fail())
throw std::runtime_error("Failed to load '" + input_model_path + "'");
}
- // Initialize interpreter
- _interpreter = std::make_unique<luci_interpreter::Interpreter>(_module.get());
+ // Create and initialize interpreters and observers
+ _interpreters.resize(_threads_size);
+ _observers.resize(_threads_size);
- _observer = std::make_unique<MinMaxObserver>();
+ for (uint32_t thread_idx = 0; thread_idx < _threads_size; ++thread_idx)
+ {
+ auto interpreter = std::make_unique<luci_interpreter::Interpreter>(_module.get());
+ auto observer = std::make_unique<MinMaxObserver>();
- _interpreter->attachObserver(_observer.get());
+ interpreter->attachObserver(observer.get());
+
+ _observers[thread_idx] = std::move(observer);
+ _interpreters[thread_idx] = std::move(interpreter);
+ }
}
// input_data_path is a path to the directory
total_input_size += getTensorSize(input_node);
}
- while (entry = readdir(dp))
+ while ((entry = readdir(dp)))
{
// Skip if the entry is not a regular file
if (entry->d_type != DT_REG)
{
const auto *input_node = loco::must_cast<const luci::CircleInput *>(input);
const auto input_size = getTensorSize(input_node);
- _interpreter->writeInputTensor(input_node, input_data.data() + offset, input_size);
+ getInterpreter()->writeInputTensor(input_node, input_data.data() + offset, input_size);
offset += input_size;
}
- _interpreter->interpret();
+ getInterpreter()->interpret();
num_records++;
}
std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
- update_quantparam(_observer.get(), mode, min_percentile, max_percentile);
+ update_quantparam(getObserver(), mode, min_percentile, max_percentile);
}
// input_data_path is a text file which specifies the representative data
{
const auto *input_node = loco::must_cast<const luci::CircleInput *>(input);
const auto input_size = getTensorSize(input_node);
- _interpreter->writeInputTensor(input_node, input_data.data() + offset, input_size);
+ getInterpreter()->writeInputTensor(input_node, input_data.data() + offset, input_size);
offset += input_size;
}
- _interpreter->interpret();
+ getInterpreter()->interpret();
num_records++;
}
std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
- update_quantparam(_observer.get(), mode, min_percentile, max_percentile);
+ update_quantparam(getObserver(), mode, min_percentile, max_percentile);
+}
+
+WholeOutput RecordMinMax::importH5Data(const std::string &input_data_path)
+{
+ try
+ {
+ dio::hdf5::HDF5Importer importer(input_data_path);
+ importer.importGroup("value");
+
+ bool is_raw_data = importer.isRawData();
+
+ const auto num_records = importer.numData();
+ if (num_records == 0)
+ throw std::runtime_error("The input data file does not contain any record.");
+
+ const auto input_nodes = loco::input_nodes(_module->graph());
+ const auto num_inputs = input_nodes.size();
+
+ WholeOutput whole_output(num_records);
+
+ // Read inputs to whole_output
+ for (int i = 0; i < num_records; ++i)
+ {
+ if (num_inputs != static_cast<uint32_t>(importer.numInputs(i)))
+ throw std::runtime_error("Wrong number of inputs.");
+
+ for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+ {
+ const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+ assert(input_node->index() == input_idx);
+ checkInputDimension(input_node);
+ Buffer input_data(getTensorSize(input_node));
+
+ if (!is_raw_data)
+ {
+ DataType dtype;
+ Shape shape;
+ importer.readTensor(i, input_idx, &dtype, &shape, input_data.data());
+
+ // Check the type and the shape of the input data is valid
+ verifyTypeShape(input_node, dtype, shape);
+ }
+ else
+ {
+ // Skip type/shape check for raw data
+ importer.readTensor(i, input_idx, input_data.data());
+ }
+ whole_output[i].emplace_back(std::move(input_data));
+ }
+ }
+
+ return whole_output;
+ }
+ catch (const H5::Exception &e)
+ {
+ H5::Exception::printErrorStack();
+ throw std::runtime_error("HDF5 error occurred.");
+ }
}
void RecordMinMax::profileData(const std::string &mode, const std::string &input_data_path,
for (int32_t record_idx = 0; record_idx < num_records; record_idx++)
{
- if (num_inputs != importer.numInputs(record_idx))
+ if (num_inputs != static_cast<uint32_t>(importer.numInputs(record_idx)))
throw std::runtime_error("Wrong number of inputs.");
std::cout << "Recording " << record_idx << "'th data" << std::endl;
- for (int32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+ for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
{
const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
assert(input_node->index() == input_idx);
// TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs)
// We can redcue the copy by directly writing data from file to interpreter inputs
- _interpreter->writeInputTensor(input_node, input_data.data(), input_data.size());
+ getInterpreter()->writeInputTensor(input_node, input_data.data(), input_data.size());
}
- _interpreter->interpret();
+ getInterpreter()->interpret();
}
std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
throw std::runtime_error("HDF5 error occurred.");
}
- update_quantparam(_observer.get(), mode, min_percentile, max_percentile);
+ update_quantparam(getObserver(), mode, min_percentile, max_percentile);
+}
+
+void RecordMinMax::profileDataInParallel(const std::string &mode,
+ const std::string &input_data_path, float min_percentile,
+ float max_percentile)
+{
+ LOGGER(l);
+
+ assert(_interpreters.size() == _threads_size);
+ assert(_observers.size() == _threads_size);
+
+ const long h5_file_size = getH5FileSize(input_data_path);
+
+ if (h5_file_size > h5_max_size_bytes)
+ throw std::runtime_error("H5 file size is too large for parallel recording");
+
+ WholeOutput whole_output;
+ try
+ {
+ whole_output = importH5Data(input_data_path);
+ }
+ catch (const std::bad_alloc &e)
+ {
+ throw std::runtime_error("Out of memory during h5 data load.");
+ }
+
+ const auto num_records = whole_output.size();
+ const auto input_nodes = loco::input_nodes(_module->graph());
+
+ // Start parallel part
+ INFO(l) << _threads_size << " concurrent threads are supported." << std::endl;
+
+ const auto run_threads = num_records < _threads_size ? num_records : _threads_size;
+
+ const auto records_batch = static_cast<uint32_t>(num_records / run_threads);
+
+ auto interpret_batch = [&whole_output, &input_nodes](int first_record, int last_record,
+ luci_interpreter::Interpreter *interpreter) {
+ for (int record_index = first_record; record_index < last_record; ++record_index)
+ {
+ for (uint32_t input_idx = 0; input_idx < input_nodes.size(); input_idx++)
+ {
+ const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+
+ const auto &cur_input_data = whole_output[record_index][input_idx];
+ interpreter->writeInputTensor(input_node, cur_input_data.data(), cur_input_data.size());
+ }
+ interpreter->interpret();
+ }
+ };
+
+ std::vector<std::thread> threads;
+ for (uint32_t t = 0; t < run_threads; ++t)
+ {
+ if (t < run_threads - 1)
+ {
+ threads.emplace_back(interpret_batch, records_batch * t, records_batch * (t + 1),
+ _interpreters[t].get());
+ }
+ else
+ {
+ threads.emplace_back(interpret_batch, records_batch * t, num_records, _interpreters[t].get());
+ }
+ }
+
+ for (uint32_t i = 0; i < run_threads; ++i)
+ threads.at(i).join();
+
+ // End parallel part
+
+ // Copy all min, max values to one observer
+ auto observer = std::make_unique<MinMaxObserver>();
+ auto main_min_max_map = const_cast<MinMaxMap *>(observer->minMaxData());
+
+ for (const auto &obs : _observers)
+ {
+ const auto cur_minmax_map = obs->minMaxData()->getMap();
+ for (auto &iter : *cur_minmax_map)
+ {
+ const auto node = iter.first;
+ const auto &minmax = iter.second;
+
+ main_min_max_map->appendMinMaxVector(node, minmax);
+ }
+ }
+
+ std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
+
+ update_quantparam(observer.get(), mode, min_percentile, max_percentile);
}
void RecordMinMax::profileDataWithRandomInputs(const std::string &mode, float min_percentile,
std::mt19937 gen(rd());
std::uniform_real_distribution<> dist(-5, 5);
- for (int32_t record_idx = 0; record_idx < num_records; record_idx++)
+ for (uint32_t record_idx = 0; record_idx < num_records; record_idx++)
{
std::cout << "Recording " << record_idx << "'th data" << std::endl;
- for (int32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+ for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
{
const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
assert(input_node->index() == input_idx);
// TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs)
// We can redcue the copy by directly writing data from file to interpreter inputs
- _interpreter->writeInputTensor(input_node, input_data.data(),
- input_data.size() * sizeof(float));
+ getInterpreter()->writeInputTensor(input_node, input_data.data(),
+ input_data.size() * sizeof(float));
}
else if (input_node->dtype() == DataType::BOOL)
{
auto input_data = genRandomBoolData(gen, num_elements);
- _interpreter->writeInputTensor(input_node, input_data.data(),
- input_data.size() * sizeof(uint8_t));
+ getInterpreter()->writeInputTensor(input_node, input_data.data(),
+ input_data.size() * sizeof(uint8_t));
}
else if (input_node->dtype() == DataType::S32)
{
auto input_data = genRandomIntData<int32_t>(gen, num_elements, 0, 100);
- _interpreter->writeInputTensor(input_node, input_data.data(),
- input_data.size() * sizeof(int32_t));
+ getInterpreter()->writeInputTensor(input_node, input_data.data(),
+ input_data.size() * sizeof(int32_t));
}
else if (input_node->dtype() == DataType::S64)
{
auto input_data = genRandomIntData<int64_t>(gen, num_elements, 0, 100);
- _interpreter->writeInputTensor(input_node, input_data.data(),
- input_data.size() * sizeof(int64_t));
+ getInterpreter()->writeInputTensor(input_node, input_data.data(),
+ input_data.size() * sizeof(int64_t));
}
}
- _interpreter->interpret();
+ getInterpreter()->interpret();
}
std::cout << "Recording finished. Number of recorded data: " << num_records << std::endl;
- update_quantparam(_observer.get(), mode, min_percentile, max_percentile);
+ update_quantparam(getObserver(), mode, min_percentile, max_percentile);
}
void RecordMinMax::saveModel(const std::string &output_model_path)
#include <cassert>
#include <stdexcept>
+#include <limits> // std::numeric_limits
#include <fp16.h>
COMMAND ${CMAKE_COMMAND} -E echo 'RANDOMIZE_ACTION_PATH=\"$<TARGET_FILE:nnkit_randomize_action>\"' >> ${TEST_CONFIG}
COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_EXPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_export_action>\"' >> ${TEST_CONFIG}
COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_IMPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_import_action>\"' >> ${TEST_CONFIG}
- COMMAND ${CMAKE_COMMAND} -E echo 'MODEL2NNPKG_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh\"' >> ${TEST_CONFIG}
+ COMMAND ${CMAKE_COMMAND} -E echo 'MODEL2NNPKG_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.py\"' >> ${TEST_CONFIG}
COMMAND ${CMAKE_COMMAND} -E echo 'RUNTIME_LIBRARY_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/Product/out/\"' >> ${TEST_CONFIG}
DEPENDS
nnkit-run
--post-arg "${WORKDIR}/${PREFIX}.expected.h5"
# Generate nnpackage model
- "${MODEL2NNPKG_PATH}" -o "${WORKDIR}" "${WORKDIR}/${PREFIX}.circle"
+ "${MODEL2NNPKG_PATH}" -o "${WORKDIR}" -m "${WORKDIR}/${PREFIX}.circle"
# Copy h5 files into nnpackage
mkdir -p "${WORKDIR}/${PREFIX}/metadata/tc"
#include <fcntl.h>
+#include <limits> // std::numeric_limits
+
namespace
{
bool load_text(const cwrap::Fildes &fildes, tf2circle::CustomOpInfoDef &def)
#include <fcntl.h>
+#include <limits> // std::numeric_limits
+
namespace
{
bool load_text(const cwrap::Fildes &fildes, tf2tflite::CustomOpInfoDef &def)
get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
-set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_8_0")
+# TODO Run both 2.8.0 and 2.10.1 test for jammy
+if(ONE_UBUNTU_CODENAME_JAMMY)
+ set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_10_1")
+else()
+ set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_8_0")
+endif()
###
### Generate test.config
int8_t get_circle_builtin_code(int8_t tfl_bop_i8)
{
- tflite::BuiltinOperator tfl_bop = static_cast<tflite::BuiltinOperator>(tfl_bop_i8);
+ return get_circle_builtin_code(static_cast<int32_t>(tfl_bop_i8));
+}
+
+int32_t get_circle_builtin_code(int32_t tfl_bop_i32)
+{
+ tflite::BuiltinOperator tfl_bop = static_cast<tflite::BuiltinOperator>(tfl_bop_i32);
switch (tfl_bop)
{
#define TFL_OPERATOR(OP) \
case tflite::BuiltinOperator_##OP: \
- return static_cast<int8_t>(circle::BuiltinOperator_##OP);
+ return static_cast<int32_t>(circle::BuiltinOperator_##OP);
#include "TFLOperator.lst"
#undef TFL_OPERATOR
+ case tflite::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES:
+ return static_cast<int32_t>(circle::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES);
default:
throw std::runtime_error("tflite2circle: wrong op");
}
circle::BuiltinOperator get_circle_builtin_code(tflite::BuiltinOperator tfl_bop);
int8_t get_circle_builtin_code(int8_t tfl_bop_i8);
+int32_t get_circle_builtin_code(int32_t tfl_bop_i32);
/**
* @brief Returns circle TensorType according to tflite.
TFL_OPERATOR(DENSIFY)
TFL_OPERATOR(SEGMENT_SUM)
TFL_OPERATOR(BATCH_MATMUL)
+// PLACEHOLDER_FOR_GREATER_OP_CODES = 127
+TFL_OPERATOR(CUMSUM)
+TFL_OPERATOR(CALL_ONCE)
+TFL_OPERATOR(BROADCAST_TO)
+TFL_OPERATOR(RFFT2D)
+TFL_OPERATOR(CONV_3D)
+TFL_OPERATOR(IMAG)
+TFL_OPERATOR(REAL)
+TFL_OPERATOR(COMPLEX_ABS)
+TFL_OPERATOR(HASHTABLE)
+TFL_OPERATOR(HASHTABLE_FIND)
+TFL_OPERATOR(HASHTABLE_IMPORT)
+TFL_OPERATOR(HASHTABLE_SIZE)
+TFL_OPERATOR(REDUCE_ALL)
+TFL_OPERATOR(CONV_3D_TRANSPOSE)
+TFL_OPERATOR(VAR_HANDLE)
+TFL_OPERATOR(READ_VARIABLE)
+TFL_OPERATOR(ASSIGN_VARIABLE)
+TFL_OPERATOR(BROADCAST_ARGS)
+TFL_OPERATOR(RANDOM_STANDARD_NORMAL)
if (NOT VCONONE_VERSION)
- set(VCONONE_VERSION 0x0000000000150001)
+ set(VCONONE_VERSION 0x0000000000160001)
# NOTE order is [build patch minor major]
# if VCONONE_VERSION is set with -D option, it will be cached
# you may have to remove cache file if you remove -D option
--- /dev/null
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+unset(VISQ_TEST_DEPS)
+
+###
+### Copy test files
+###
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/test
+ COMMAND ${CMAKE_COMMAND} -E copy_directory
+ ${CMAKE_CURRENT_SOURCE_DIR}/test ${CMAKE_CURRENT_BINARY_DIR}/test)
+
+list(APPEND VISQ_TEST_DEPS ${CMAKE_CURRENT_BINARY_DIR}/test)
+
+###
+### Import visqlib module
+###
+get_target_property(VISQ_BIN_PATH visq BINARY_DIR)
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/visqlib
+ COMMAND ${CMAKE_COMMAND} -E create_symlink
+ ${VISQ_BIN_PATH}/visqlib ${CMAKE_CURRENT_BINARY_DIR}/visqlib)
+
+list(APPEND VISQ_TEST_DEPS ${CMAKE_CURRENT_BINARY_DIR}/visqlib)
+
+###
+### Import pics module
+###
+get_target_property(PICS_BIN_PATH pics BINARY_DIR)
+add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/circle
+ COMMAND ${CMAKE_COMMAND} -E create_symlink
+ ${PICS_BIN_PATH}/circle ${CMAKE_CURRENT_BINARY_DIR}/circle)
+
+list(APPEND VISQ_TEST_DEPS ${CMAKE_CURRENT_BINARY_DIR}/circle)
+
+###
+### Generate Resources.py
+###
+set(RESOURCE_FILE "${CMAKE_CURRENT_BINARY_DIR}/test/Resources.py")
+
+get_target_property(FP32_MODEL_DIR testDataGenerator BINARY_DIR)
+
+add_custom_command(
+ OUTPUT ${RESOURCE_FILE}
+ COMMAND ${CMAKE_COMMAND} -E echo 'fp32_model_dir=\"${FP32_MODEL_DIR}\"' >> ${RESOURCE_FILE}
+ COMMENT "Generate file to specify resource location"
+)
+
+list(APPEND VISQ_TEST_DEPS ${RESOURCE_FILE})
+
+add_custom_target(visq_unittest ALL DEPENDS ${VISQ_TEST_DEPS})
+
+# Use Python in venv to run unittest with pydot module
+add_test(
+ NAME visq_unittest
+ COMMAND ${NNCC_OVERLAY_DIR}/venv_2_8_0/bin/python -m unittest
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+)
+
+if(ONE_UBUNTU_CODENAME_JAMMY)
+ add_test(
+ NAME visq_210_unittest
+ COMMAND ${NNCC_OVERLAY_DIR}/venv_2_10_1/bin/python -m unittest
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+ )
+endif(ONE_UBUNTU_CODENAME_JAMMY)
--- /dev/null
+# visq-unittest
+
+_visq-unittest_ is a module to test visq
--- /dev/null
+require("pics")
+require("common-artifacts")
+require("visq")
--- /dev/null
+# DO NOT REMOVE THIS FILE
--- /dev/null
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Test visqlib.DotBuilder module"""
+
+import unittest
+import pydot
+from pathlib import Path
+
+from visqlib.DotBuilder import DotBuilder
+from test.Resources import fp32_model_dir
+
+
+class VisqDotBuilderTest(unittest.TestCase):
+ def test_dot_builder_wrong_input_file(self):
+ self.assertRaises(FileNotFoundError, DotBuilder, "wrong", "wrong", "wrong",
+ "wrong")
+
+ def test_dot_builder(self):
+ test_colors = [{"b": 0, "e": 0.5, "c": "green"}, {"b": 0.5, "e": 1, "c": "red"}]
+ test_qerror_map = dict()
+ test_qerror_map["ofm"] = 0.1
+ builder = DotBuilder(fp32_model_dir + "/Add_000.circle", "Add_000.dot", "MPEIR",
+ test_colors)
+ builder.save(test_qerror_map)
+
+ graph = pydot.graph_from_dot_file("Add_000.dot")[0]
+ # Why 1? 0 is output
+ ofm_node = graph.get_node("\"ofm\"")[1]
+ self.assertEqual("green", ofm_node.get_fillcolor())
+
+
+if __name__ == "__main__":
+ unittest.main()
--- /dev/null
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+'''Test visqlib.Palette module'''
+
+import unittest
+
+from visqlib.Palette import YLORRD9Palette
+
+
+class VisqPaletteTest(unittest.TestCase):
+ def test_ylorrd9(self):
+ min_test = [0.0, 0, -100, -100]
+ max_test = [1.0, 500, 100, -10]
+
+ for min_val, max_val in zip(min_test, max_test):
+ palette = YLORRD9Palette(qerror_min=min_val, qerror_max=max_val)
+ cs = palette.colorscheme()
+ self.assertEqual(9, len(cs))
+
+ def test_ylorrd9_wrong_minmax(self):
+ min_test = [0.0, 10]
+ max_test = [0.0, 0]
+
+ for min_val, max_val in zip(min_test, max_test):
+ # min must be less than max
+ self.assertRaises(
+ RuntimeError, YLORRD9Palette, qerror_min=min_val, qerror_max=max_val)
+
+
+if __name__ == '__main__':
+ unittest.main()
--- /dev/null
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+'''Test visqlib.QErrorComputer module'''
+
+import unittest
+import tempfile
+import numpy as np
+import os
+
+from visqlib.QErrorComputer import MPEIRComputer
+from visqlib.QErrorComputer import MSEComputer
+from visqlib.QErrorComputer import TAEComputer
+
+
+class VisqQErrorComputerTest(unittest.TestCase):
+ def setUp(self):
+ "Called before running each test"
+ self.fp32_dir = tempfile.TemporaryDirectory()
+ self.fq_dir = tempfile.TemporaryDirectory()
+
+ def tearDown(self):
+ "Called after running each test"
+ self.fp32_dir.cleanup()
+ self.fq_dir.cleanup()
+
+ def _setUpSingleTensorData(self):
+ with open(self.fp32_dir.name + '/tensors.txt', 'w') as f:
+ f.write('test')
+ with open(self.fq_dir.name + '/tensors.txt', 'w') as f:
+ f.write('test')
+ os.mkdir(self.fp32_dir.name + '/0')
+ os.mkdir(self.fq_dir.name + '/0')
+ test_data = np.zeros(16)
+ np.save(self.fp32_dir.name + '/0/test.npy', test_data)
+ np.save(self.fq_dir.name + '/0/test.npy', test_data)
+
+ def _setUpTwoTensorData(self):
+ with open(self.fp32_dir.name + '/tensors.txt', 'w') as f:
+ f.write('test')
+ with open(self.fq_dir.name + '/tensors.txt', 'w') as f:
+ f.write('test')
+ os.mkdir(self.fp32_dir.name + '/0')
+ os.mkdir(self.fp32_dir.name + '/1')
+ os.mkdir(self.fq_dir.name + '/0')
+ os.mkdir(self.fq_dir.name + '/1')
+ test_data_one = np.ones(16)
+ test_data_zero = np.zeros(16)
+ np.save(self.fp32_dir.name + '/0/test.npy', test_data_one)
+ np.save(self.fp32_dir.name + '/1/test.npy', test_data_zero)
+ np.save(self.fq_dir.name + '/0/test.npy', test_data_zero)
+ np.save(self.fq_dir.name + '/1/test.npy', test_data_zero)
+ # Golden: (1 + 0) / 2 = 0.5 for MSE
+
+ def _setUpDifferentTensorData(self):
+ # Two fp32 data (test, test2)
+ # One fq data (test)
+ # NOTE When does this happen?
+ # This case can happen because visq ignores nodes that do not affect qerrors.
+ # For example, RESHAPE Op does not affect qerrors, so its fq data is not dumped,
+ # although it is listed in 'tensors.txt'.
+ with open(self.fp32_dir.name + '/tensors.txt', 'w') as f:
+ f.writelines(['test\n', 'test2'])
+ with open(self.fq_dir.name + '/tensors.txt', 'w') as f:
+ f.writelines(['test\n', 'test2'])
+ os.mkdir(self.fp32_dir.name + '/0')
+ os.mkdir(self.fq_dir.name + '/0')
+ test_data = np.zeros(16)
+ np.save(self.fp32_dir.name + '/0/test.npy', test_data)
+ np.save(self.fp32_dir.name + '/0/test2.npy', test_data)
+ np.save(self.fq_dir.name + '/0/test.npy', test_data)
+
+ def test_MPEIR(self):
+ self._setUpSingleTensorData()
+
+ computer = MPEIRComputer(self.fp32_dir.name, self.fq_dir.name)
+ qmap = computer.run()
+ self.assertAlmostEqual(0.0, qmap['test'])
+
+ def test_MPEIR_different_tensors(self):
+ self._setUpDifferentTensorData()
+
+ computer = MPEIRComputer(self.fp32_dir.name, self.fq_dir.name)
+ qmap = computer.run()
+ self.assertAlmostEqual(0.0, qmap['test'])
+
+ def test_MSE(self):
+ self._setUpSingleTensorData()
+
+ computer = MSEComputer(self.fp32_dir.name, self.fq_dir.name)
+ qmap, qmin, qmax = computer.run()
+ self.assertAlmostEqual(0.0, qmap['test'])
+ self.assertAlmostEqual(0.0, qmin)
+ self.assertAlmostEqual(0.0, qmax)
+
+ def test_MSE_two(self):
+ self._setUpTwoTensorData()
+
+ computer = MSEComputer(self.fp32_dir.name, self.fq_dir.name)
+ qmap, qmin, qmax = computer.run()
+ self.assertAlmostEqual(0.5, qmap['test'])
+ self.assertAlmostEqual(0.0, qmin)
+ self.assertAlmostEqual(1.0, qmax)
+
+ def test_MSE_different_tensors(self):
+ self._setUpDifferentTensorData()
+
+ computer = MSEComputer(self.fp32_dir.name, self.fq_dir.name)
+ qmap, qmin, qmax = computer.run()
+ self.assertAlmostEqual(0.0, qmap['test'])
+ self.assertAlmostEqual(0.0, qmin)
+ self.assertAlmostEqual(0.0, qmax)
+
+ def test_TAE(self):
+ self._setUpSingleTensorData()
+
+ computer = TAEComputer(self.fp32_dir.name, self.fq_dir.name)
+ qmap, qmin, qmax = computer.run()
+ self.assertAlmostEqual(0.0, qmap['test'])
+
+ def test_TAE_different_options(self):
+ self._setUpDifferentTensorData()
+
+ computer = TAEComputer(self.fp32_dir.name, self.fq_dir.name)
+ qmap, qmin, qmax = computer.run()
+ self.assertAlmostEqual(0.0, qmap['test'])
+ self.assertAlmostEqual(0.0, qmin)
+ self.assertAlmostEqual(0.0, qmax)
+
+ def test_TAE_two(self):
+ self._setUpTwoTensorData()
+ computer = TAEComputer(self.fp32_dir.name, self.fq_dir.name)
+ qmap, qmin, qmax = computer.run()
+ self.assertAlmostEqual(0.0, qmin)
+ self.assertAlmostEqual(8.0, qmap['test'])
+ self.assertAlmostEqual(16.0, qmax)
+
+
+if __name__ == '__main__':
+ unittest.main()
--- /dev/null
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+'''Test visqlib.Util module'''
+
+import unittest
+
+from visqlib.Util import to_filename
+from visqlib.Util import valid_attr
+from visqlib.Util import pretty_float
+
+
+class VisqUtilTest(unittest.TestCase):
+ def test_to_filename(self):
+ data = 'abc/d/e'
+ self.assertEqual('abc_d_e', to_filename(data))
+
+ long_data = 'x' * 300
+ self.assertEqual('x' * 255, to_filename(long_data))
+
+ def test_valid_attr(self):
+ class Test:
+ def __init__(self):
+ self.a = 'a'
+
+ test = Test()
+ self.assertTrue(valid_attr(test, 'a'))
+ self.assertFalse(valid_attr(test, 'b'))
+
+ def test_pretty_float(self):
+ test_configs = [0.123456, 12.3456, [0.123456], {'test': [0.123456]}]
+ three_digits_ans = [0.123, 12.346, [0.123], {'test': [0.123]}]
+ for test_data, ans in zip(test_configs, three_digits_ans):
+ res = pretty_float(test_data, ndigits=3)
+ self.assertEqual(res, ans)
+
+ test_configs = [0.123456, 12.3456, [0.123456], {'test': [0.123456]}]
+ four_digits_ans = [0.1235, 12.3456, [0.1235], {'test': [0.1235]}]
+ for test_data, ans in zip(test_configs, four_digits_ans):
+ res = pretty_float(test_data, ndigits=4)
+ self.assertEqual(res, ans)
+
+
+if __name__ == '__main__':
+ unittest.main()
--- /dev/null
+unset(VISQ_DEPS)
+
+###
+### Set up visq executable
+###
+set(VISQ_FILE "visq")
+set(VISQ_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${VISQ_FILE}")
+set(VISQ_BIN "${CMAKE_CURRENT_BINARY_DIR}/${VISQ_FILE}")
+
+add_custom_command(OUTPUT ${VISQ_BIN}
+ COMMAND ${CMAKE_COMMAND} -E copy "${VISQ_SRC}" "${VISQ_BIN}"
+ DEPENDS ${VISQ_SRC}
+ COMMENT "Generate ${VISQ_BIN}"
+)
+
+list(APPEND VISQ_DEPS ${VISQ_BIN})
+
+###
+### Set up visqlib directory
+###
+set(VISQ_PYTHON_DIR "visqlib")
+set(VISQ_PYTHON_DIR_BIN "${CMAKE_CURRENT_BINARY_DIR}/${VISQ_PYTHON_DIR}")
+
+add_custom_command(OUTPUT ${VISQ_PYTHON_DIR_BIN}
+ COMMAND ${CMAKE_COMMAND} -E make_directory "${VISQ_PYTHON_DIR_BIN}"
+ COMMENT "Generate ${VISQ_PYTHON_DIR_BIN}"
+)
+
+list(APPEND VISQ_DEPS ${VISQ_PYTHON_DIR_BIN})
+
+###
+### Set up Python files
+###
+set(VISQ_PYTHON_FILES DumpFakeQuantFM.py
+ DumpFP32FM.py
+ Palette.py
+ QErrorComputer.py
+ DotBuilder.py
+ Util.py)
+
+foreach(VISQ_PYTHON_FILE IN ITEMS ${VISQ_PYTHON_FILES})
+ set(VISQ_PYTHON_FILE_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${VISQ_PYTHON_DIR}/${VISQ_PYTHON_FILE}")
+ set(VISQ_PYTHON_FILE_BIN "${CMAKE_CURRENT_BINARY_DIR}/${VISQ_PYTHON_DIR}/${VISQ_PYTHON_FILE}")
+
+ add_custom_command(OUTPUT ${VISQ_PYTHON_FILE_BIN}
+ COMMAND ${CMAKE_COMMAND} -E copy "${VISQ_PYTHON_FILE_SRC}" "${VISQ_PYTHON_FILE_BIN}"
+ DEPENDS ${VISQ_PYTHON_SRC}
+ COMMENT "Generate ${VISQ_PYTHON_FILE_BIN}"
+ )
+
+ list(APPEND VISQ_DEPS ${VISQ_PYTHON_FILE_BIN})
+
+endforeach(VISQ_PYTHON_FILE)
+
+add_custom_target(visq ALL DEPENDS ${VISQ_DEPS})
+
+install(FILES ${VISQ_FILE}
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION bin)
+
+install(DIRECTORY ${VISQ_PYTHON_DIR}
+ FILE_PERMISSIONS OWNER_WRITE OWNER_READ
+ GROUP_READ
+ WORLD_READ
+ DESTINATION bin)
--- /dev/null
+# visq
+
+_visq_ is a module to generate a json file used to visualize layer-wise quantization errors
+(https://github.com/Samsung/ONE/issues/9694).
+
+## Example
+```bash
+$ ./visq --fp32_circle sample.circle \
+ --q_circle sample.q.circle \
+ --data test.h5 \
+ --mpeir_output sample.mpeir.visq.json \
+ --mse_output sample.mse.visq.json \
+ --tae_output sample.tae.visq.json \
+ --dump_dot_graph
+```
+
+The above command will generate
+- `sample.mpeir.visq.json`: Json file that contains layer-wise mpeir.
+- `sample.mse.visq.json`: Json file that conatins layer-wise mse.
+- `sample.mpeir.visq.json.dot`: Dot graph for layer-wise mpeir.
+- `sample.tae.visq.json.dot`: Dot graph for layer-wise tae.
+- `sample.mse.visq.json.dot`: Dot graph for layer-wise mse.
+
+## Quantization error metrics
+
+f: Result of fp32 model
+q: Result of quantized model
+
+- MPEIR: Mean Peak Error to Interval Ratio = Average(max(|f - q|) / (max(f) - min(f) + epsilon))
+epsilon: 1e-6
+- MSE: Mean Squared Error = Average(square(f - q))
+- TAE: Total Absolute Error = Sum(|f - q|)
--- /dev/null
+require("dalgona")
+require("circle-quantizer")
--- /dev/null
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@" # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255 # '''
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import subprocess
+import tempfile
+import json
+import os
+import math
+import sys
+
+import h5py as h5
+import numpy as np
+
+from shutil import copyfile
+from pathlib import Path
+
+from visqlib.Palette import YLORRD9Palette
+from visqlib.QErrorComputer import MPEIRComputer, MSEComputer, TAEComputer
+from visqlib.Util import valid_attr, pretty_float
+from visqlib.DotBuilder import DotBuilder
+
+
+def _get_parser():
+ parser = argparse.ArgumentParser(
+ description='Command line tool to visualize layer-wise quantization errors')
+ parser.add_argument(
+ "-f",
+ "--fp32_circle",
+ type=str,
+ help="Path to the fp32 circle model.",
+ required=True)
+ parser.add_argument(
+ "-q",
+ "--q_circle",
+ type=str,
+ help="Path to the quantized circle model.",
+ required=True)
+ parser.add_argument(
+ "-d",
+ "--data",
+ type=str,
+ help=
+ "Path to the data used for inference. Random data will be used if this option is not given.",
+ required=False)
+ parser.add_argument(
+ "--mpeir_output",
+ type=str,
+ help="Path to the output json file (qerror metric = MPEIR).",
+ required=False)
+ parser.add_argument(
+ "--mse_output",
+ type=str,
+ help="Path to the output json file (qerror metric = MSE).",
+ required=False)
+ parser.add_argument(
+ "--tae_output",
+ type=str,
+ help="Path to the output json file (qerror metric = TAE).",
+ required=False)
+ parser.add_argument(
+ "--dump_dot_graph", action="store_true", help="Dump dot graph.", required=False)
+ parser.add_argument(
+ "-b",
+ "--batch_size",
+ type=int,
+ help="Batch size to process large datasets.",
+ required=False)
+
+ return parser
+
+
+def _verify_args(args):
+ """Verify the given arguments"""
+
+ valid_outputs = ['mpeir_output', 'mse_output', 'tae_output']
+
+ # Check if at least one output option is given
+ num_outputs = 0
+ for output_name in valid_outputs:
+ if valid_attr(args, output_name):
+ num_outputs += 1
+
+ if num_outputs == 0:
+ raise RuntimeError("At least one output should be given.")
+
+
+def _run_dalgona(model, data, analysis, save_dir):
+ dir_path = Path(__file__).parent.resolve()
+ dalgona_path = os.path.join(dir_path, 'dalgona')
+ cmd = [dalgona_path]
+ cmd += ['--input_model', str(model)]
+ cmd += ['--analysis', str(analysis)]
+ if data != None:
+ cmd += ['--input_data', str(data)]
+ cmd += ['--analysis_args', str(save_dir)]
+
+ try:
+ subprocess.run(cmd, capture_output=True, check=True)
+ except subprocess.CalledProcessError as e:
+ print('Error raised while running the below command')
+ print(' '.join(cmd))
+ print(e.output)
+ raise
+
+
+# Generate h5 file that contains a dataset of a single batch
+# This is for batch execution of visq
+def gen_batch_h5(inputs_data, inputs_path):
+ # Create h5 file
+ output_path = inputs_path + "/inputs.h5"
+ h5_file = h5.File(output_path, 'w')
+ group = h5_file.create_group("value")
+ group.attrs['desc'] = "Input data"
+
+ for i in range(len(inputs_data)):
+ sample = group.create_group(str(i))
+ for j in range(len(inputs_data[i])):
+ sample.create_dataset(str(j), data=inputs_data[i][j])
+
+ h5_file.close()
+ return output_path
+
+
+# Aggregate intermediate results for a given data
+def advance_on_data(fp32_model, fq_model, data, computers):
+
+ curr_dir = Path(__file__).parent.resolve()
+ dump_fp32_py = curr_dir / 'visqlib' / 'DumpFP32FM.py'
+ dump_fq_py = curr_dir / 'visqlib' / 'DumpFakeQuantFM.py'
+
+ with tempfile.TemporaryDirectory() as fp32_dir, \
+ tempfile.TemporaryDirectory() as fq_dir:
+
+ _run_dalgona(fp32_model, data, dump_fp32_py, fp32_dir)
+ copyfile(fp32_dir + '/tensors.txt', fq_dir + '/tensors.txt')
+ _run_dalgona(fq_model, data, dump_fq_py, fq_dir)
+
+ for metric_key in computers:
+ computers[metric_key][0].advance_on(fp32_dir, fq_dir)
+
+
+def _run_batch(fp32_model, fq_model, data, computers, batch_size):
+ with tempfile.TemporaryDirectory() as inputs_dir:
+ with h5.File(data, 'r') as f:
+ dataset = f['value']
+
+ inputs = []
+ for data_index in dataset:
+ cur_inputs = []
+ for input_index in dataset[data_index]:
+ d = dataset[data_index][input_index][:]
+ cur_inputs.append(np.array(d, np.float32))
+
+ inputs.append(cur_inputs)
+ if len(inputs) >= batch_size:
+ input_path = gen_batch_h5(inputs, inputs_dir)
+ advance_on_data(fp32_model, fq_model, input_path, computers)
+ inputs = []
+
+ if len(inputs) > 0:
+ input_path = gen_batch_h5(inputs, inputs_dir)
+ advance_on_data(fp32_model, fq_model, input_path, computers)
+
+
+def _fake_quantize(input_model, output_model):
+ dir_path = Path(__file__).parent.resolve()
+ circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+ cmd = [circle_quantizer_path]
+ cmd += ['--fake_quantize']
+ cmd += [str(input_model)]
+ cmd += [str(output_model)]
+
+ try:
+ subprocess.run(cmd, check=True)
+ except subprocess.CalledProcessError as e:
+ print('Error raised while running the below command')
+ print(' '.join(cmd))
+ print(e.output)
+ raise
+
+
+# Recursively visit items and check if there is Infinity or NaN
+def _check_float(item):
+ if isinstance(item, dict):
+ for v in item.values():
+ _check_float(v)
+ if isinstance(item, list):
+ for v in item:
+ _check_float(v)
+ if isinstance(item, float):
+ if item == -float('inf') or item == float('inf'):
+ raise RuntimeError('Infinite value detected. Value must be float')
+ if math.isnan(item):
+ raise RuntimeError('NaN value detected. Value must be float')
+
+
+def _build_json(model, metric, colorscheme, error):
+ # model: string
+ # metric: string
+ # colorscheme: list ['b': begin, 'e': end, 'c':color]
+ # error: dict {tensor_name:error}
+
+ meta = {}
+ meta["model"] = model
+ meta["metric"] = metric
+ meta["colorscheme"] = pretty_float(colorscheme)
+ result = {}
+ result["meta"] = meta
+ # Why list? To support multiple subgraphs
+ result["error"] = [pretty_float(error)]
+
+ # Invariants
+ _check_float(meta["colorscheme"])
+ _check_float(result["error"])
+ return result
+
+
+def _save_dot(circle_path: str, dot_path: str, metric: str, colors: list, qerror: dict):
+ # circle_path: Path to the circle model (required to build graph)
+ # dot_path: Path to the output dot file
+ # metric: Metric name (ex: MPEIR, MSE)
+ # colors: list [{'b': begin, 'e': end, 'c':color}, ..]
+ # qerror: dict {tensor_name (str) -> qerror (float)}
+ builder = DotBuilder(
+ circle_path=circle_path, dot_path=dot_path, metric=metric, colors=colors)
+
+ builder.save(qerror)
+
+
+def run_on_data_batchwise(fp32_model, q_model, data, dump_dot_graph, computers,
+ batch_size):
+
+ with tempfile.TemporaryDirectory() as model_dir:
+ fq_model = model_dir + '/fq_model.circle'
+
+ # Step 1. Fake quantize quantized circle model
+ _fake_quantize(q_model, fq_model)
+
+ # process the whole dataset batch by batch
+ _run_batch(fp32_model, fq_model, data, computers, batch_size)
+
+ #compute the final results
+ for metric_key in computers:
+ cur_computer = computers[metric_key][0]
+ output = computers[metric_key][1]
+ if metric_key == 'MPEIR':
+ qerror_map = cur_computer.get_final_result()
+ q_min = 0.0
+ q_max = 1.0
+ elif metric_key == 'MSE' or metric_key == 'TAE':
+ qerror_map, q_min, q_max = cur_computer.get_final_result()
+
+ palette = YLORRD9Palette(qerror_min=q_min, qerror_max=q_max)
+ result = _build_json(
+ metric=metric_key,
+ model=Path(fp32_model).name,
+ colorscheme=palette.colorscheme(),
+ error=qerror_map)
+ with open(output, "w") as f:
+ json.dump(result, f)
+
+ if dump_dot_graph:
+ _save_dot(
+ circle_path=fp32_model,
+ dot_path=output + '.dot',
+ metric=metric_key,
+ colors=palette.colorscheme(),
+ qerror=qerror_map)
+
+
+def run_on_data(fp32_model, q_model, data, dump_dot_graph, computers):
+ curr_dir = Path(__file__).parent.resolve()
+ dump_fp32_py = curr_dir / 'visqlib' / 'DumpFP32FM.py'
+ dump_fq_py = curr_dir / 'visqlib' / 'DumpFakeQuantFM.py'
+
+ with tempfile.TemporaryDirectory() as model_dir, \
+ tempfile.TemporaryDirectory() as fp32_dir, \
+ tempfile.TemporaryDirectory() as fq_dir:
+ fq_model = model_dir + '/fq_model.circle'
+
+ # Step 1. Fake quantize quantized circle model
+ _fake_quantize(q_model, fq_model)
+
+ # Step 2. Run dalgona to dump intermediate FMs in FP32 model
+ _run_dalgona(fp32_model, data, dump_fp32_py, fp32_dir)
+
+ # Copy list of dumped tensors
+ copyfile(fp32_dir + '/tensors.txt', fq_dir + '/tensors.txt')
+
+ # Step 3. Run dalgona to dump intermediate FMs in fq model
+ _run_dalgona(fq_model, data, dump_fq_py, fq_dir)
+
+ # Step 4. Read results and compute qerror
+ for metric_key in computers:
+ cur_computer = computers[metric_key][0]
+ output = computers[metric_key][1]
+ cur_computer.advance_on(fp32_dir, fq_dir)
+ if metric_key == 'MPEIR':
+ qerror_map = cur_computer.get_final_result()
+ q_min = 0.0
+ q_max = 1.0
+ elif metric_key == 'MSE' or metric_key == 'TAE':
+ qerror_map, q_min, q_max = cur_computer.get_final_result()
+
+ palette = YLORRD9Palette(qerror_min=q_min, qerror_max=q_max)
+ result = _build_json(
+ metric=metric_key,
+ model=Path(fp32_model).name,
+ colorscheme=palette.colorscheme(),
+ error=qerror_map)
+ with open(output, "w") as f:
+ json.dump(result, f)
+
+ if dump_dot_graph:
+ _save_dot(
+ circle_path=fp32_model,
+ dot_path=output + '.dot',
+ metric=metric_key,
+ colors=palette.colorscheme(),
+ qerror=qerror_map)
+
+
+def main():
+ # parse arguments
+ parser = _get_parser()
+ args = parser.parse_args()
+ _verify_args(args)
+
+ fp32_model = args.fp32_circle
+ q_model = args.q_circle
+ data = None
+ if valid_attr(args, 'data'):
+ data = args.data
+ dump_dot_graph = args.dump_dot_graph
+ batch_size = None
+ if valid_attr(args, 'batch_size'):
+ batch_size = args.batch_size
+
+ computers = {}
+ if args.mpeir_output:
+ computers['MPEIR'] = (MPEIRComputer(None, None), args.mpeir_output)
+
+ if args.mse_output:
+ computers['MSE'] = (MSEComputer(None, None), args.mse_output)
+
+ if args.tae_output:
+ computers['TAE'] = (TAEComputer(None, None), args.tae_output)
+
+ if batch_size == None:
+ run_on_data(fp32_model, q_model, data, dump_dot_graph, computers)
+ else:
+ run_on_data_batchwise(fp32_model, q_model, data, dump_dot_graph, computers,
+ batch_size)
+
+
+if __name__ == '__main__':
+ try:
+ main()
+ except Exception as e:
+ prog_name = os.path.basename(__file__)
+ print(f"{prog_name}: {type(e).__name__}: " + str(e), file=sys.stderr)
+ sys.exit(255)
--- /dev/null
+# Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pydot
+import math
+
+from circle import Model
+
+from pathlib import Path
+
+
+# Return the name of the tensor
+def _tensor_name(graph, tid):
+ return graph.Tensors(tid).Name().decode('utf-8')
+
+
+# Return double-quoted string
+def _quote(string: str):
+ return '"' + string + '"'
+
+
+# Class to build dot graph from qerror_map
+class DotBuilder:
+ def __init__(self, circle_path: str, dot_path: str, metric: str, colors: str):
+ '''
+ circle_path: Path to the fp32 circle model (required to build graph)
+ dot_path: Path to the saved dot file
+ metric: Metric name (ex: MPEIR, MSE)
+ colors: List of color slots [{'b': begin, 'e': end, 'c':color}, ..]
+ '''
+ with open(circle_path, 'rb') as f:
+ self._model = Model.Model.GetRootAsModel(f.read())
+
+ if self._model.SubgraphsLength() != 1:
+ raise RuntimeError("Only one subgraph is supported")
+
+ self._name = Path(circle_path).name
+ self._dot_path = dot_path
+ self._metric = metric
+ self._colors = colors
+
+ # Return color (RGB) for the given qerror
+ def _get_color(self, qerror: float):
+ # Find a slot where qerror is in the range of [begin, end]
+ for slot in self._colors:
+ begin = slot['b']
+ end = slot['e']
+ if (qerror > begin or math.isclose(
+ qerror, begin)) and (qerror < end or math.isclose(qerror, end)):
+ return slot['c']
+
+ # Use the first color if qerror is smaller than the first begin
+ if qerror < self._colors[0]['b']:
+ return self._colors[0]['c']
+
+ # Use the last color if qerror is larger than the last end
+ if qerror > self._colors[-1]['e']:
+ return self._colors[-1]['c']
+
+ raise RuntimeError("Color ID not found. QError: " + str(qerror))
+
+ # Generate a pydot.Node object which represents the color table
+ def _gen_color_table(self):
+ color_table = "< <table>"
+ for slot in self._colors:
+ begin = slot['b']
+ end = slot['e']
+ color = slot['c']
+ color_table += "<tr> <td bgcolor=\""
+ color_table += color
+ color_table += "\">"
+ color_table += self._metric + ": {:.4f}".format(
+ begin) + " ~ " + "{:.4f}".format(end)
+ color_table += "</td> </tr>"
+ color_table += "</table> >"
+ return pydot.Node("color_table", shape='none', label=color_table)
+
+ # Save dot graph to self._dot_path
+ def save(self, qerror_map: dict):
+ '''
+ qerror_map: Dictionary of {op_name (str) -> qerror (float)}
+ '''
+ # Build graph
+ DOT = pydot.Dot(self._name, graph_type="digraph")
+
+ # Add color table
+ DOT.add_node(self._gen_color_table())
+
+ # Dictionary from output tensor name to Op name {str -> str}
+ # This dict is for handling Ops with multiple output tensors.
+ # We use the first output tensor's name as the Op name, following
+ # the implementation of luci IR
+ output_to_op = dict()
+
+ graph = self._model.Subgraphs(0)
+
+ # Add Input nodes
+ for i in range(graph.InputsLength()):
+ name = _tensor_name(graph, graph.Inputs(i))
+ output_to_op[name] = name
+ DOT.add_node(pydot.Node(_quote(name)))
+
+ # Add Output nodes
+ for i in range(graph.OutputsLength()):
+ name = _tensor_name(graph, graph.Outputs(i))
+ output_to_op[name] = name
+ DOT.add_node(pydot.Node(_quote(name)))
+
+ # Add Edges
+ for i in range(graph.OperatorsLength()):
+ op = graph.Operators(i)
+ # Name of the first output tensor
+ op_name = _tensor_name(graph, op.Outputs(0))
+ if op.OutputsLength() == 0:
+ print(op_name)
+ continue
+
+ if op_name in qerror_map:
+ qerror = qerror_map[op_name]
+ node = pydot.Node(
+ _quote(op_name),
+ style="filled",
+ fillcolor=self._get_color(qerror),
+ xlabel=self._metric + ": {:.4f}".format(qerror))
+ else:
+ # qerror_map does not have qerror info for the op. Color gray.
+ # When this happen? visq does not collect qerror info of some Ops
+ # For example, Reshape Op does not change values, so its qerror
+ # info is not collected.
+ node = pydot.Node(_quote(op_name), style="filled", fillcolor='gray')
+
+ DOT.add_node(node)
+
+ for output_idx in range(op.OutputsLength()):
+ output_name = _tensor_name(graph, op.Outputs(output_idx))
+ # Set Op name as the first output tensor name (op_name)
+ output_to_op[output_name] = op_name
+
+ for j in range(op.InputsLength()):
+ op_input = op.Inputs(j)
+
+ # Optional input case (ex: For TConv with no bias, bias is -1)
+ if op_input == -1:
+ continue
+
+ op_input_name = _tensor_name(graph, op_input)
+ if op_input_name not in output_to_op:
+ continue
+
+ # Use the saved name to handle multiple outputs
+ op_input_name = output_to_op[op_input_name]
+ DOT.add_edge(pydot.Edge(_quote(op_input_name), _quote(op_name)))
+
+ DOT.write(self._dot_path)
--- /dev/null
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Script that dumps FM of FP32 model
+# NOTE This script runs on dalgona
+
+import numpy as np
+
+from pathlib import Path
+from Util import to_filename
+
+
+# Dump FP32 model's intermediate FM data and their names
+#
+# Before
+# self._dir/
+#
+# After
+# self._dir/
+# tensors.txt
+# <TENSOR_NAME>.npy
+# NOTE TENSOR_NAME is transformed by to_filename
+class DumpFP32FM:
+ def StartAnalysis(self, args):
+ self._dir = Path(args)
+ self._num_data = 0
+ self._tensor_names = set()
+
+ def EndNetworkExecution(self, outputs):
+ self._num_data += 1
+
+ def DefaultOpPost(self, name, opcode, inputs, output):
+ # Save intermediate FM into tensor_name.npy
+ data_path = self._dir / str(self._num_data)
+ data_path.mkdir(parents=False, exist_ok=True)
+ np.save(str(data_path / to_filename(name)), output['data'])
+ self._tensor_names.add(name)
+
+ def EndAnalysis(self):
+ # Save tensor names line by line
+ with open(self._dir / 'tensors.txt', 'w') as f:
+ for name in self._tensor_names:
+ f.write("%s\n" % name)
--- /dev/null
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Script that dumps dequantized FM
+# NOTE This script runs on dalgona
+
+import numpy as np
+
+from pathlib import Path
+from Util import to_filename
+
+# Fake-quantized Op has the postfix of fq_postfix
+# TODO Remove coupling with fake quantization codes
+fq_postfix = '_FQ_Quantize_FQ_Dequantize'
+
+
+# Return the original name before fake quantization
+# Return None if name is not from fake quantization (Dequantize Op in original model)
+# TODO Handle the case when the original node's name contains fq_postfix
+def _name_before_fq(name):
+ if not name.endswith(fq_postfix):
+ return None
+
+ return name[0:name.find(fq_postfix)]
+
+
+# Dump fake-quantized model's intermediate FM data according to tensors.txt
+#
+# Before
+# self._dir/
+# tensors.txt
+#
+# After
+# self._dir/
+# tensors.txt
+# <TENSOR_NAME>.npy
+# NOTE TENSOR_NAME is transformed by to_filename
+class DumpFakeQuantFM:
+ def StartAnalysis(self, args):
+ self._dir = Path(args)
+ self._num_data = 0
+ with open(self._dir / 'tensors.txt') as f:
+ self._target_tensors = set([line.rstrip() for line in f])
+
+ def EndNetworkExecution(self, outputs: list):
+ self._num_data += 1
+
+ # TODO Use DequantizePost when dalgona supports it
+ def DefaultOpPost(self, name, opcode, inputs, output):
+ if opcode == 'Dequantize':
+ orig_name = _name_before_fq(name)
+ if orig_name in self._target_tensors:
+ data_path = self._dir / str(self._num_data)
+ data_path.mkdir(parents=False, exist_ok=True)
+ np.save(str(data_path / to_filename(orig_name)), output['data'])
--- /dev/null
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Class to save colorscheme
+class Palette:
+ # Child class must implement __init__ to fill the below members
+ def __init__(self):
+ # Element of self._slots has [lower bound, upper bound] of qerrors to decide a color
+ self._slots = []
+ # Element of self._colors has rgb values in string format
+ self._colors = []
+ raise NotImplementedError('Child class must implement __init__')
+
+ # Return color scheme as a list of objects
+ # Each object has the following attributes
+ # b: begin qerror
+ # e: end qerror
+ # c: color (in RGB string)
+ def colorscheme(self):
+ cs = []
+ for slot, color in zip(self._slots, self._colors):
+ cs.append({"b": slot[0], "e": slot[1], "c": color})
+ return cs
+
+
+# Ranges of slots are defined by qerror_min/qerror_max
+# Each slot has a uniform range
+# For example, if qerror_min = 0.0, qerror_max = 1.0, number of colors = 10
+# Ranges of slots will be as follows.
+# [0.0, 0.1], [0.1, 0.2], [0.2, 0.3] ... [0.8, 0.9], [0.9, 1.0]
+class UniformPalette(Palette):
+ def __init__(self, qerror_min, qerror_max, colors):
+ self._colors = colors
+ self._slots = []
+ qerror_range = qerror_max - qerror_min
+ num_colors = len(self._colors)
+ for i in range(num_colors):
+ lower_bound = qerror_min + i * (qerror_range / num_colors)
+ upper_bound = qerror_min + (i + 1) * (qerror_range / num_colors)
+
+ self._slots.append([lower_bound, upper_bound])
+
+ # Invariant
+ assert len(self._slots) == num_colors
+
+
+# Palette for ylorrd9 colorscheme
+class YLORRD9Palette(UniformPalette):
+ def __init__(self, qerror_min, qerror_max):
+ if qerror_min >= qerror_max:
+ raise RuntimeError('min must be less than max')
+
+ # From https://colorbrewer2.org/#type=sequential&scheme=YlOrRd&n=9
+ colors = [
+ "#ffffcc", "#ffeda0", "#fed976", "#feb24c", "#fd8d3c", "#fc4e2a", "#e31a1c",
+ "#bd0026", "#800026"
+ ]
+ super().__init__(qerror_min, qerror_max, colors)
--- /dev/null
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import glob
+import numpy as np
+
+from pathlib import Path
+from visqlib.Util import to_filename
+
+
+class QErrorComputer:
+ def __init__(self, fp32_dir, fq_dir):
+ self._fp32_dir = fp32_dir
+ self._fq_dir = fq_dir
+ self.qerror_map = dict()
+ self._num_processed_data = 0
+
+ def collect_data_path(self, fp32_dir, fq_dir):
+ # Assumption: FM data are saved as follows
+ #
+ # fp32_dir/
+ # tensors.txt
+ # <DATA_INDEX>/
+ # <TENSOR_NAME>.npy
+ #
+ # fq_dir/
+ # tensors.txt
+ # <DATA_INDEX>/
+ # <TENSOR_NAME>.npy
+ self._num_data = len(list(filter(os.path.isdir, glob.glob(fp32_dir + '/*'))))
+ if self._num_data != len(list(filter(os.path.isdir, glob.glob(fq_dir + '/*')))):
+ raise RuntimeError("Number of data mistmatches")
+
+ self._num_processed_data += self._num_data
+
+ self._filename_to_tensor = dict()
+ with open(Path(fp32_dir) / 'tensors.txt') as f:
+ tensors = set([line.rstrip() for line in f])
+ for tensor in tensors:
+ # Check if filename is unique
+ # Fix name finding logic unless
+ assert to_filename(tensor) not in self._filename_to_tensor
+ self._filename_to_tensor[to_filename(tensor)] = tensor
+
+ # Save paths to fp32 data and fq data for each tensor
+ # dict
+ # {
+ # <tensor_name>: (fp32_path, fq_path),
+ # <tensor_name>: (fp32_path, fq_path),
+ # ...
+ # }
+ data_paths = dict()
+ for data_idx in range(self._num_data):
+ fp32_results = glob.glob(fp32_dir + '/' + str(data_idx) + '/*.npy')
+ for fp32_data_path in fp32_results:
+ fp32_path = Path(fp32_data_path)
+ fq_data_path = fq_dir + '/' + str(data_idx) + '/' + fp32_path.with_suffix(
+ '.npy').name
+ fq_path = Path(fq_data_path)
+ filename = fp32_path.stem
+ tensor_name = self._filename_to_tensor[filename]
+
+ # Only save the tensors which have both fp32 data and fq data
+ if fq_path.is_file() and fp32_path.is_file():
+ if tensor_name in data_paths:
+ data_paths[tensor_name].append((fp32_data_path, fq_data_path))
+ else:
+ data_paths[tensor_name] = [(fp32_data_path, fq_data_path)]
+
+ return data_paths
+
+ def run(self):
+ '''Return qerror map (dict: tensor_name(string) -> qerror(float)).'''
+ raise NotImplementedError # Child must implement this
+
+
+class MPEIRComputer(QErrorComputer):
+ def __init__(self, fp32_dir, fq_dir):
+ super().__init__(fp32_dir, fq_dir)
+
+ # Incrementally compute Qerror while traversing all data in fp32_dir and fq_dir
+ def advance_on(self, fp32_dir, fq_dir):
+ data_paths = self.collect_data_path(fp32_dir, fq_dir)
+ for tensor_name, data_path in data_paths.items():
+ for (fp32_data_path, fq_data_path) in data_path:
+ fp32_data = np.load(fp32_data_path)
+ fq_data = np.load(fq_data_path)
+
+ diff = np.absolute(fp32_data - fq_data).reshape(-1)
+
+ fp32_min = np.min(fp32_data.reshape(-1))
+ fp32_max = np.max(fp32_data.reshape(-1))
+
+ # Peak Error-to-Interval Ratio (PEIR)
+ # NOTE: PEIR is an analogue of PSNR (Peak Signal to Noise Ratio)
+ PEAK_ERROR = np.max(diff)
+ INTERVAL = fp32_max - fp32_min
+
+ # If INTERVAL is 0, PEIR becomes NaN.
+ # To prevent this, relaxed PEIR with epsilon(10^(-6)) is used.
+ rPEIR = PEAK_ERROR / (INTERVAL + 0.000001)
+
+ if tensor_name in self.qerror_map:
+ self.qerror_map[tensor_name] += rPEIR
+ else:
+ self.qerror_map[tensor_name] = rPEIR
+
+ def get_final_result(self):
+ qerror_map = dict()
+ for tensor_name, acc in self.qerror_map.items():
+ qerror_map[tensor_name] = acc / self._num_processed_data
+
+ return qerror_map
+
+ def run(self):
+ self.advance_on(self._fp32_dir, self._fq_dir)
+ return self.get_final_result()
+
+
+class MSEComputer(QErrorComputer):
+ def __init__(self, fp32_dir, fq_dir):
+ super().__init__(fp32_dir, fq_dir)
+ self.qerror_min = float('inf')
+ self.qerror_max = -self.qerror_min
+
+ # Incrementally compute Qerror while traversing all data in fp32_dir and fq_dir
+ def advance_on(self, fp32_dir, fq_dir):
+ data_paths = self.collect_data_path(fp32_dir, fq_dir)
+ for tensor_name, data_path in data_paths.items():
+ for (fp32_data_path, fq_data_path) in data_path:
+ fp32_data = np.load(fp32_data_path)
+ fq_data = np.load(fq_data_path)
+
+ MSE = np.square(fp32_data - fq_data).mean()
+
+ if tensor_name in self.qerror_map:
+ self.qerror_map[tensor_name] += MSE
+ else:
+ self.qerror_map[tensor_name] = MSE
+
+ self.qerror_min = min(MSE, self.qerror_min)
+ self.qerror_max = max(MSE, self.qerror_max)
+
+ def get_final_result(self):
+ qerror_map = dict()
+ for tensor_name, acc in self.qerror_map.items():
+ qerror_map[tensor_name] = acc / self._num_processed_data
+
+ return qerror_map, self.qerror_min, self.qerror_max
+
+ def run(self):
+ self.advance_on(self._fp32_dir, self._fq_dir)
+ return self.get_final_result()
+
+
+class TAEComputer(QErrorComputer): #total absolute error
+ def __init__(self, fp32_dir, fq_dir):
+ super().__init__(fp32_dir, fq_dir)
+ self.total_error = 0
+ self.qerror_min = float('inf')
+ self.qerror_max = -self.qerror_min
+
+ def advance_on(self, fp32_dir, fq_dir):
+ data_paths = self.collect_data_path(fp32_dir, fq_dir)
+ for tensor_name, data_path in data_paths.items():
+ for (fp32_data_path, fq_data_path) in data_path:
+ fp32_data = np.load(fp32_data_path)
+ fq_data = np.load(fq_data_path)
+
+ total_error = np.sum(np.abs(fp32_data - fq_data))
+
+ if tensor_name in self.qerror_map:
+ self.qerror_map[tensor_name] += total_error
+ else:
+ self.qerror_map[tensor_name] = total_error
+
+ self.qerror_min = min(total_error, self.qerror_min)
+ self.qerror_max = max(total_error, self.qerror_max)
+
+ def get_final_result(self):
+ qerror_map = dict()
+ for tensor_name, acc in self.qerror_map.items():
+ qerror_map[tensor_name] = acc / self._num_processed_data
+ return qerror_map, self.qerror_min, self.qerror_max
+
+ def run(self):
+ self.advance_on(self._fp32_dir, self._fq_dir)
+ return self.get_final_result()
--- /dev/null
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Change tensor name into the one compatible with Linux file system
+# '/' is replaced with '_'
+# Too long name is sliced to 255 characters
+def to_filename(tensor_name):
+ assert isinstance(tensor_name, str)
+ return tensor_name.replace('/', '_')[-255:]
+
+
+# Check if attr is valid
+def valid_attr(args, attr):
+ return hasattr(args, attr) and getattr(args, attr)
+
+
+# Recursively visit items and round floats with ndigits
+def pretty_float(item, ndigits=4):
+ if isinstance(item, dict):
+ return {k: pretty_float(v, ndigits) for k, v in item.items()}
+ if isinstance(item, list):
+ return [pretty_float(x, ndigits) for x in item]
+ if isinstance(item, float):
+ return round(item, ndigits)
+ return item
target_link_libraries(${TEST_CKER} gtest gtest_main ${LIB_PTHREAD})
add_test(${TEST_CKER} ${TEST_CKER})
-install(TARGETS ${TEST_CKER} DESTINATION unittest_standalone)
+install(TARGETS ${TEST_CKER} DESTINATION unittest)
}
}
+inline void Dequantize(const Shape &input_shape, const int16_t *input_data,
+ const Shape &output_shape, float *output_data, const float scale,
+ const int32_t zero_point)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+
+ int i = 0;
+#ifdef USE_NEON
+ const float32x4_t scale_dup = vdupq_n_f32(static_cast<float>(scale));
+ const float32x4_t zero_times_scale_dup = vdupq_n_f32(static_cast<float>(-zero_point * scale));
+ for (; i <= flat_size - 8; i += 8)
+ {
+ const int16x4_t input_s16_low = vld1_s16(input_data + i);
+ const int16x4_t input_s16_high = vld1_s16(input_data + i + 4);
+ const int32x4_t val_low = vmovl_s16(input_s16_low);
+ const int32x4_t val_high = vmovl_s16(input_s16_high);
+
+ float32x4_t result_low, result_high;
+ ScaleWithNewZeroPoint(val_low, scale_dup, zero_times_scale_dup, &result_low);
+ ScaleWithNewZeroPoint(val_high, scale_dup, zero_times_scale_dup, &result_high);
+
+ vst1q_f32(output_data + i, result_low);
+ vst1q_f32(output_data + i + 4, result_high);
+ }
+#endif // NEON
+ for (; i < flat_size; ++i)
+ {
+ const int32_t val = input_data[i];
+ const float result = static_cast<float>(scale * (val - zero_point));
+ output_data[i] = result;
+ }
+}
+
} // namespace cker
} // namespace nnfw
namespace cker
{
+/**
+ * @brief Internal scalar_logistic_op operation struct
+ *
+ * @note Recent Eigen3 scalar_logistic_op return invalid value on ARM32 if
+ * input value is float type 88 (expected: 1, actual: 0)
+ * As a workaround, we use old version scalar_logistic_op internal struct
+ * TODO Remove this workaround
+ */
+template <typename T> struct scalar_logistic_op
+{
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T &x) const
+ {
+ const T one = T(1);
+ return one / (one + Eigen::numext::exp(-x));
+ }
+
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet &x) const
+ {
+ const Packet one = Eigen::internal::pset1<Packet>(T(1));
+ return pdiv(one, padd(one, pexp(pnegate(x))));
+ }
+};
+
inline void Logistic(const Shape &input_shape, const float *input_data, const Shape &output_shape,
float *output_data)
{
auto input_map = MapAsVector(input_data, input_shape);
auto output_map = MapAsVector(output_data, output_shape);
- output_map.array() = input_map.array().unaryExpr(Eigen::internal::scalar_logistic_op<float>());
+
+ // Use old version scalar_logistic_op
+ output_map.array() = input_map.array().unaryExpr(nnfw::cker::scalar_logistic_op<float>());
}
} // namespace cker
#ifndef __NNFW_CKER_QUANTIZE_H__
#define __NNFW_CKER_QUANTIZE_H__
+#include "cker/operation/Round.h"
#include "cker/Shape.h"
#include "cker/Types.h"
#include "cker/Utils.h"
}
}
+template <>
+inline void Quantize(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+ int8_t *output_data, const float scale, const int32_t zero_point)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ static constexpr int32_t min_val = std::numeric_limits<int8_t>::min();
+ static constexpr int32_t max_val = std::numeric_limits<int8_t>::max();
+
+ int i = 0;
+#ifdef USE_NEON
+ const float32x4_t reverse_scale_dup = vdupq_n_f32(1.0f / scale);
+ const int32x4_t zero_point_dup = vdupq_n_s32(zero_point);
+ const int32x4_t min_val_dup = vdupq_n_s32(min_val);
+ const int32x4_t max_val_dup = vdupq_n_s32(max_val);
+
+ for (; i <= flat_size - 8; i += 8)
+ {
+ const float *src_data_ptr = input_data + i;
+ float32x4_t input_val_0 = vld1q_f32(src_data_ptr);
+ float32x4_t input_val_1 = vld1q_f32(src_data_ptr + 4);
+
+ input_val_0 = vmulq_f32(input_val_0, reverse_scale_dup);
+ input_val_1 = vmulq_f32(input_val_1, reverse_scale_dup);
+
+ int32x4_t casted_val_0 = RoundToNearest(input_val_0);
+ int32x4_t casted_val_1 = RoundToNearest(input_val_1);
+
+ casted_val_0 = vaddq_s32(casted_val_0, zero_point_dup);
+ casted_val_1 = vaddq_s32(casted_val_1, zero_point_dup);
+
+ // Clamp the values to fit the target type's range.
+ casted_val_0 = vmaxq_s32(casted_val_0, min_val_dup);
+ casted_val_1 = vmaxq_s32(casted_val_1, min_val_dup);
+ casted_val_0 = vminq_s32(casted_val_0, max_val_dup);
+ casted_val_1 = vminq_s32(casted_val_1, max_val_dup);
+
+ const int16x4_t narrowed_val_0 = vmovn_s32(casted_val_0);
+ const int16x4_t narrowed_val_1 = vmovn_s32(casted_val_1);
+ const int16x8_t combined_val = vcombine_s16(narrowed_val_0, narrowed_val_1);
+ const int8x8_t combined_val_narrowed = vmovn_s16(combined_val);
+ vst1_s8(output_data + i, combined_val_narrowed);
+ }
+#endif // NEON
+
+ for (; i < flat_size; ++i)
+ {
+ const float val = input_data[i];
+ const int32_t unclamped = static_cast<int32_t>(round(val / scale)) + zero_point;
+ const int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+ output_data[i] = clamped;
+ }
+}
+
+template <>
+inline void Quantize(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+ uint8_t *output_data, const float scale, const int32_t zero_point)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ static constexpr int32_t min_val = std::numeric_limits<uint8_t>::min();
+ static constexpr int32_t max_val = std::numeric_limits<uint8_t>::max();
+
+ int i = 0;
+#ifdef USE_NEON
+ const float32x4_t reverse_scale_dup = vdupq_n_f32(1.0f / scale);
+ const int32x4_t zero_point_dup = vdupq_n_s32(zero_point);
+ const int32x4_t min_val_dup = vdupq_n_s32(min_val);
+ const int32x4_t max_val_dup = vdupq_n_s32(max_val);
+
+ for (; i <= flat_size - 8; i += 8)
+ {
+ const float *src_data_ptr = input_data + i;
+ float32x4_t input_val_0 = vld1q_f32(src_data_ptr);
+ float32x4_t input_val_1 = vld1q_f32(src_data_ptr + 4);
+
+ input_val_0 = vmulq_f32(input_val_0, reverse_scale_dup);
+ input_val_1 = vmulq_f32(input_val_1, reverse_scale_dup);
+
+ int32x4_t casted_val_0 = RoundToNearest(input_val_0);
+ int32x4_t casted_val_1 = RoundToNearest(input_val_1);
+
+ casted_val_0 = vaddq_s32(casted_val_0, zero_point_dup);
+ casted_val_1 = vaddq_s32(casted_val_1, zero_point_dup);
+
+ // Clamp the values to fit the target type's range.
+ casted_val_0 = vmaxq_s32(casted_val_0, min_val_dup);
+ casted_val_1 = vmaxq_s32(casted_val_1, min_val_dup);
+ casted_val_0 = vminq_s32(casted_val_0, max_val_dup);
+ casted_val_1 = vminq_s32(casted_val_1, max_val_dup);
+
+ const uint16x4_t narrowed_val_0 = vqmovun_s32(casted_val_0);
+ const uint16x4_t narrowed_val_1 = vqmovun_s32(casted_val_1);
+ const uint16x8_t combined_val = vcombine_u16(narrowed_val_0, narrowed_val_1);
+ const uint8x8_t combined_val_narrowed = vmovn_u16(combined_val);
+ vst1_u8(output_data + i, combined_val_narrowed);
+ }
+#endif // NEON
+
+ for (; i < flat_size; ++i)
+ {
+ const float val = input_data[i];
+ const int32_t unclamped = static_cast<int32_t>(round(val / scale)) + zero_point;
+ const int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+ output_data[i] = clamped;
+ }
+}
+
+template <>
+inline void Quantize(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+ int16_t *output_data, const float scale, const int32_t zero_point)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ static constexpr int32_t min_val = std::numeric_limits<int16_t>::min();
+ static constexpr int32_t max_val = std::numeric_limits<int16_t>::max();
+
+ int i = 0;
+#ifdef USE_NEON
+ const float32x4_t reverse_scale_dup = vdupq_n_f32(1.0f / scale);
+ const int32x4_t zero_point_dup = vdupq_n_s32(zero_point);
+ const int32x4_t min_val_dup = vdupq_n_s32(min_val);
+ const int32x4_t max_val_dup = vdupq_n_s32(max_val);
+
+ for (; i <= flat_size - 8; i += 8)
+ {
+ const float *src_data_ptr = input_data + i;
+ float32x4_t input_val_0 = vld1q_f32(src_data_ptr);
+ float32x4_t input_val_1 = vld1q_f32(src_data_ptr + 4);
+
+ input_val_0 = vmulq_f32(input_val_0, reverse_scale_dup);
+ input_val_1 = vmulq_f32(input_val_1, reverse_scale_dup);
+
+ int32x4_t casted_val_0 = RoundToNearest(input_val_0);
+ int32x4_t casted_val_1 = RoundToNearest(input_val_1);
+
+ casted_val_0 = vaddq_s32(casted_val_0, zero_point_dup);
+ casted_val_1 = vaddq_s32(casted_val_1, zero_point_dup);
+
+ // Clamp the values to fit the target type's range.
+ casted_val_0 = vmaxq_s32(casted_val_0, min_val_dup);
+ casted_val_1 = vmaxq_s32(casted_val_1, min_val_dup);
+ casted_val_0 = vminq_s32(casted_val_0, max_val_dup);
+ casted_val_1 = vminq_s32(casted_val_1, max_val_dup);
+
+ const int16x4_t narrowed_val_0 = vmovn_s32(casted_val_0);
+ const int16x4_t narrowed_val_1 = vmovn_s32(casted_val_1);
+ vst1_s16(output_data + i, narrowed_val_0);
+ vst1_s16(output_data + i + 4, narrowed_val_1);
+ }
+#endif // NEON
+
+ for (; i < flat_size; ++i)
+ {
+ const float val = input_data[i];
+ const int32_t unclamped = static_cast<int32_t>(round(val / scale)) + zero_point;
+ const int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+ output_data[i] = clamped;
+ }
+}
+
inline void Quantize(const int32_t *multiplier, const int32_t *shift, int32_t channel_size,
int32_t total_size, int32_t output_zp, int32_t output_min, int32_t output_max,
int32_t *scratch, int8_t *output)
}
// Calculate mean by dividing output_data by num of aggregated element.
- U num_elements_in_axis = 1;
+ size_t num_elements_in_axis = 1;
for (int idx = 0; idx < num_resolved_axis; ++idx)
{
size_t current = static_cast<size_t>(input_shape.Dims(resolved_axis_data()[idx]));
// Overflow prevention.
- if (current > static_cast<size_t>(std::numeric_limits<U>::max() / num_elements_in_axis))
+ if (current > static_cast<size_t>(std::numeric_limits<size_t>::max() / num_elements_in_axis))
{
return false;
}
if (compute_sum)
{
// TODO(b/116341117): Eliminate float and do this completely in 8bit.
- const float bias = -input_zero_point * scale * num_elements_in_axis + 0.5f;
+ const float bias = -input_zero_point * scale * num_elements_in_axis;
for (size_t idx = 0; idx < num_outputs; ++idx)
{
const U value =
}
else
{
- const float bias = -input_zero_point * scale + 0.5f;
+ const float bias = -input_zero_point * scale;
for (size_t idx = 0; idx < num_outputs; ++idx)
{
float float_mean =
#define __NNFW_CKER_ROUND_H__
#include "cker/Shape.h"
+#include "cker/Utils.h"
#include <cmath>
}
}
+#ifdef USE_NEON
+
+inline int32x4_t RoundToNearest(const float32x4_t input)
+{
+#if defined(__aarch64__) || defined(__SSSE3__)
+ // Note: vcvtnq_s32_f32 is not available in ARMv7
+ return vcvtnq_s32_f32(input);
+#else
+ static const float32x4_t zero_val_dup = vdupq_n_f32(0.0f);
+ static const float32x4_t point5_val_dup = vdupq_n_f32(0.5f);
+ static const float32x4_t minus_point5_val_dup = vdupq_n_f32(-0.5f);
+
+ const uint32x4_t mask = vcltq_f32(input, zero_val_dup);
+ const float32x4_t round = vbslq_f32(mask, minus_point5_val_dup, point5_val_dup);
+ return vcvtq_s32_f32(vaddq_f32(input, round));
+#endif // defined(__aarch64__) || defined(__SSSE3__)
+}
+
+#endif // NEON
+
inline void Round(const Shape &input_shape, const float *input_data, const Shape &output_shape,
float *output_data)
{
author = 'Samsung Research & contributors'
# The full version, including alpha/beta/rc tags
-release = '1.21.0'
+release = '1.22.0'
# -- General configuration ---------------------------------------------------
python3 \
python3-pip \
python3-venv \
+python3.8 \
+python3.8-dev \
+python3.8-venv \
scons \
software-properties-common \
unzip \
### Test
+Preprequisite for testing in ARM32 device.
+```
+# numpy is required for value match in ARM32 target device
+sudo apt-get install python3-pip
+python3 -m pip install numpy
+```
+
You can also run unit tests in ARM32 Ubuntu device with cross build results.
First you need to run the test in host to prepare files that are currently
complicated in target device.
This document describes how to build runtime with GBS for Tizen AARCH64.
As a real example, we'll also describe how to prepare Tizen on Raspberry Pi 4
-and show you how to run our test package runner `nnpackage_run`.
+and show you how to run our test package runner `onert_run`.
For ARM32, there would be not much difference with some changes.
$ gbs -c infra/nnfw/config/gbs.conf build --include-all -A aarch64 --define 'test_build 1'
```
- `-A aarch64` is to set architecture to AARCH64. Use `arm32` for ARM32 target.
-- `--define 'test_build 1'` is to enable test build so that we can use `nnpackage_run`
+- `--define 'test_build 1'` is to enable test build so that we can use `onert_run`
Now take a cup of coffee.
`/opt/usr/home/owner/media/models` folder with `sdb` command.
```
-sh-3.2# BACKENDS="cpu" Product/out/bin/nnpackage_run \
+sh-3.2# BACKENDS="cpu" Product/out/bin/onert_run \
--nnpackage /opt/usr/home/owner/media/models/mobilenet_v2_1.4_224
Package Filename /opt/usr/home/owner/media/models/mobilenet_v2_1.4_224
```
$ export BUILD_TYPE=release
-$ make -f Makefile.template install
+$ make -f Makefile.template
```
Or you can simply do something like this:
```
-$ BUILD_TYPE=release make -f Makefile.template install
+$ BUILD_TYPE=release make -f Makefile.template
```
The build method described here is a `native build` in which the build environment and execution environment are same. So, this command creates a runtime binary targeting the current build architecture, probably x86_64, as the execution environment. You can find the build output in the ./Product folder as follows:
```
$ tree -L 2 ./Product
./Product
-├── obj -> /home/sjlee/star/one/Product/x86_64-linux.release/obj
├── out -> /home/sjlee/star/one/Product/x86_64-linux.release/out
└── x86_64-linux.release
- ├── BUILD
- ├── CONFIGURE
- ├── INSTALL
├── obj
└── out
$ tree -L 3 ./Product/out
./Product/out
├── bin
-│ ├── nnapi_test
-│ ├── nnpackage_run
+│ ├── onert_run
│ ├── tflite_comparator
│ └── tflite_run
├── include
│ ├── nnfw
-│ │ ├── NeuralNetworks.h
│ │ ├── NeuralNetworksEx.h
│ │ ├── NeuralNetworksExtensions.h
-│ │ ├── nnfw.h
-│ │ └── nnfw_experimental.h
+│ │ ├── NeuralNetworks.h
+│ │ ├── nnfw_experimental.h
+│ │ └── nnfw.h
│ └── onert
│ ├── backend
│ ├── compiler
│ └── util
├── lib
│ ├── libbackend_cpu.so
-│ ├── libcircle_loader.so
+│ ├── libbackend_ruy.so
│ ├── libneuralnetworks.so
│ ├── libnnfw-dev.so
-│ ├── libonert_core.so
-│ └── libtflite_loader.so
+│ └── libonert_core.so
+├── nnapi-gtest
+│ ├── nnapi_gtest
+│ ├── nnapi_gtest.skip
+│ ├── nnapi_gtest.skip.noarch.interp
+│ └── nnapi_gtest.skip.x86_64-linux.cpu
├── test
-│ ├── FillFrom_runner
│ ├── command
│ │ ├── nnpkg-test
│ │ ├── prepare-model
│ │ ├── unittest
│ │ └── verify-tflite
+│ ├── FillFrom_runner
│ ├── list
│ │ ├── benchmark_nnpkg_model_list.txt
-│ │ ├── frameworktest_list.aarch64.acl_cl.txt
-│ │ ├── frameworktest_list.aarch64.acl_neon.txt
-│ │ ├── frameworktest_list.aarch64.cpu.txt
-│ │ ├── frameworktest_list.armv7l.acl_cl.txt
-│ │ ├── frameworktest_list.armv7l.acl_neon.txt
-│ │ ├── frameworktest_list.armv7l.cpu.txt
-│ │ ├── frameworktest_list.noarch.interp.txt
-│ │ ├── frameworktest_list.x86_64.cpu.txt
│ │ ├── nnpkg_test_list.armv7l-linux.acl_cl
│ │ ├── nnpkg_test_list.armv7l-linux.acl_neon
│ │ ├── nnpkg_test_list.armv7l-linux.cpu
│ │ ├── nnpkg_test_list.noarch.interp
-│ │ ├── tflite_loader_list.aarch64.txt
-│ │ └── tflite_loader_list.armv7l.txt
+│ │ ├── tflite_comparator.aarch64.acl_cl.list
+│ │ ├── tflite_comparator.aarch64.acl_neon.list
+│ │ ├── tflite_comparator.aarch64.cpu.list
+│ │ ├── tflite_comparator.armv7l.acl_cl.list
+│ │ ├── tflite_comparator.armv7l.acl_neon.list
+│ │ ├── tflite_comparator.armv7l.cpu.list
+│ │ ├── tflite_comparator.noarch.interp.list
+│ │ └── tflite_comparator.x86_64.cpu.list
│ ├── models
-│ │ ├── nnfw_api_gtest
│ │ ├── run_test.sh
│ │ └── tflite
│ ├── nnpkgs
│ │ └── FillFrom
│ └── onert-test
-├── unittest
-│ ├── nnapi_gtest
-│ ├── nnapi_gtest.skip
-│ ├── nnapi_gtest.skip.noarch.interp
-│ └── nnapi_gtest.skip.x86_64-linux.cpu
-└── unittest_standalone
+└── unittest
+ ├── ndarray_test
├── nnfw_api_gtest
- ├── test_compute
- ├── test_onert
- ├── test_onert_backend_cpu_common
+ ├── nnfw_api_gtest_models
+ │ ├── add
+ │ ├── add_invalid_manifest
+ │ ├── add_no_manifest
+ │ ├── if_dynamic
+ │ ├── mobilenet_v1_1.0_224
+ │ └── while_dynamic
+ ├── nnfw_lib_misc_test
+ ├── test_cker
+ ├── test_onert_core
├── test_onert_frontend_nnapi
└── tflite_test
-20 directories, 47 files
+26 directories, 46 files
```
The result of running the inception_v3 model using runtime is as follows. Please consider that this is a test that simply checks execution latency without considering the accuracy of the model.
```
-$ USE_NNAPI=1 ./Product/out/bin/tflite_run ./inception_v3.tflite
-nnapi function 'ANeuralNetworksModel_create' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_addOperand' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_setOperandValue' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_addOperation' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_identifyInputsAndOutputs' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_finish' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksCompilation_create' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksCompilation_finish' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-input tensor indices = [317,]
-nnapi function 'ANeuralNetworksExecution_create' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksExecution_setInput' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksExecution_setOutput' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksExecution_startCompute' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksEvent_wait' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksEvent_free' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksExecution_free' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-output tensor indices = [316(max:905),]
+$ ./Product/out/bin/onert_run --modelfile ./inception_v3.tflite
+Model Filename ./inception_v3.tflite
===================================
MODEL_LOAD takes 1.108 ms
PREPARE takes 0.190 ms
- MIN : 183.895 ms
- GEOMEAN : 183.895 ms
===================================
-nnapi function 'ANeuralNetworksCompilation_free' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
-nnapi function 'ANeuralNetworksModel_free' is loaded from '/home/sjlee/star/one/Product/x86_64-linux.release/out/bin/../lib/libneuralnetworks.so'
```
-Here, `USE_NNAPI=1` means that **ONE** runtime is used for model inference. If omitted, the model will be executed using Tensorflow lite, the basic framework for verification. From the previous build result, you can see that it is the path to the directory where `libneuralnetworks.so` and `libonert_core.so` are located.
+If you use `tflite_run` instead of `onert_run`, the model will be executed using Tensorflow lite, the basic framework for verification. From the previous build result, you can see that it is the path to the directory where `tflite_run` and `onert_run` are located.
If you come here without any problems, you have all of the basic environments for runtime development.
1. Create a commit
It is time to create a commit for submission once you are convinced that your contribution is
- ready to go. Please include signed-off message at the end of commit message. If not, your pull
- request will be **rejected** by CI.
+ ready to go. Please include
+ [signed-off message](https://github.com/Samsung/ONE/wiki/ONE-Developer's-Certificate-of-Origin)
+ at the end of commit message. If not, your pull request will be **rejected** by CI.
1. Check code format locally
```
$ export ROOTFS_DIR=xxx
...
-$ make all install # do normal build
-$ TARGET_ARCH=aarch64 make all install # do cross build
+$ make # do normal build
+$ TARGET_ARCH=aarch64 make # do cross build
```
### Run test
If you used `ROOTFS_DIR` to prepare in alternative folder, you should also give this to makefile.
```
-$ CROSS_BUILD=1 TARGET_ARCH=armv7l make all install
+$ CROSS_BUILD=1 TARGET_ARCH=armv7l make
# If ROOTFS_DIR is in alternative folder
$ ROOTFS_DIR=/path/to/your/rootfs/arm \
-CROSS_BUILD=1 TARGET_ARCH=armv7l make all install
+CROSS_BUILD=1 TARGET_ARCH=armv7l make
```
You can also omit the `CROSS_BUILD=1` option if you explicitly pass `ROOTFS_DIR`. In that case, if
```
$ export ROOTFS_DIR=xxx
...
-$ make all install # do normal build
-$ TARGET_ARCH=armv7l make all install # do cross build
+$ make # do normal build
+$ TARGET_ARCH=armv7l make # do cross build
```
### Run test
sjlee@odroid's password:
...
$ cd ~/one/Product
-$ ln ${PWD}/armv7l-linux.debug/obj obj
$ ln ${PWD}/armv7l-linux.debug/out out
$ cd ..
$ ls -la Product
drwxrwxr-x 5 sjlee sjlee 4096 Jun 4 20:55 armv7l-linux.debug
-lrwxrwxrwx 1 sjlee sjlee 51 Jun 4 20:54 obj -> /home/sjlee/one/Product/armv7l-linux.debug/obj
lrwxrwxrwx 1 sjlee sjlee 51 Jun 4 20:55 out -> /home/sjlee/one/Product/armv7l-linux.debug/out
```
- Same as `nnfw_set_available_backends`
- Example
```bash
-BACKENDS=cpu ./Product/out/bin/nnpackage_run ...
+BACKENDS=cpu ./Product/out/bin/onert_run ...
```
### 2. OP_BACKEND_[OP_TYPE]
- Example
- Execute `Conv2D` operator on ruy backend and others on cpu backend
```bash
-OP_BACKEND_Conv2D=ruy BACKENDS="cpu;ruy" ./Product/out/bin/nnpackage_run ...
+OP_BACKEND_Conv2D=ruy BACKENDS="cpu;ruy" ./Product/out/bin/onert_run ...
```
### 3. OP_BACKEND_MAP
- Example
- Execute `operator 10` on `acl_cl` backend and others on `acl_neon` backend
```bash
-OP_BACKEND_MAP="10=acl_cl" BACKENDS="acl_neon;acl_cl" ./Product/out/bin/nnpackage_run ...
+OP_BACKEND_MAP="10=acl_cl" BACKENDS="acl_neon;acl_cl" ./Product/out/bin/onert_run ...
```
--- /dev/null
+.. ONE documentation master file, created by
+ sphinx-quickstart on Fri Mar 24 14:03:12 2023.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+1.22
+====
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ ./release-note-1.22.0.md
--- /dev/null
+# Release Note 1.22.0
+
+## ONE Compiler
+
+- Introduce new optimization options: `unroll_unidirseqlstm`, `forward_transpose_op`, `fold_fully_connected`, `fuse_prelu`
+- Support more Ops for fake quantization: `Depth2Space`, `Space2Depth`, `Pack`, `Unpack`, `Abs`
+- Support more Ops for quantization: `Abs`, `ReduceProd`
+- Introduce _visq_ tool for quantization error visualization
+- Introduce _Environment_ section into configuration file
+- Improve speed of `convert_nchw_to_nhwc` option
+- Support `Add`, `Mul` of index-type (int32, int64) tensors in _one-quantize_
+- Support ubuntu 20.04
foreach(retry_count RANGE 5)
message(STATUS "(Trial Count : ${retry_count})")
+ # For external mirror server
+ envoption(EXTERNAL_SERVER_USERPWD "")
file(DOWNLOAD ${URL} "${DOWNLOAD_PATH}"
STATUS status
+ USERPWD "${EXTERNAL_SERVER_USERPWD}"
LOG log)
list(GET status 0 status_code)
if(retry_count EQUAL 5)
message(FATAL_ERROR "Download ${PREFIX} from ${URL} - failed")
endif()
-
+
# Retry after 10 seconds when download fails
execute_process(COMMAND sleep 10)
endforeach()
if("${TARGET_ARCH}" STREQUAL "x86_64")
set(TARGET_ARCH_BASE ${TARGET_ARCH})
+elseif("${TARGET_ARCH}" STREQUAL "armv8-m")
+ set(TARGET_ARCH_BASE "arm")
+elseif("${TARGET_ARCH}" STREQUAL "armv7-r")
+ set(TARGET_ARCH_BASE "arm")
elseif("${TARGET_ARCH}" STREQUAL "armv7em")
set(TARGET_ARCH_BASE "arm")
elseif("${TARGET_ARCH}" STREQUAL "armv7l")
--- /dev/null
+function(_CMSIS_NN_import)
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(CMSIS_NN_4_0_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/CMSIS-NN/archive/refs/tags/v4.0.0.tar.gz)
+
+ ExternalSource_Download(CMSIS_NN DIRNAME CMSIS-NN-4.0.0 ${CMSIS_NN_4_0_0_URL})
+
+ set(CMSIS_NNSource_DIR ${CMSIS_NN_SOURCE_DIR} PARENT_SCOPE)
+ set(CMSIS_NNSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_CMSIS_NN_import)
+
+_CMSIS_NN_import()
--- /dev/null
+set(PACKAGE_VERSION "4.0.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
# Note: cmake supports GTest and does not find GTestConfig.cmake or GTest-config.cmake.
# Refer to "https://cmake.org/cmake/help/v3.5/module/FindGTest.html"
# find_package(GTest) creates options like GTEST_FOUND, not GTest_FOUND.
-if(GTEST_FOUND)
- message(STATUS "Found GTest: true")
-else(GTEST_FOUND)
+if(NOT GTEST_FOUND)
message(STATUS "GTEST_FOUND false: call find_package(GTest)")
# Reset package config directory cache to prevent recursive find
unset(GTest_DIR CACHE)
find_package(GTest)
-endif(GTEST_FOUND)
+endif(NOT GTEST_FOUND)
find_package(Threads)
if(${GTEST_FOUND} AND TARGET Threads::Threads)
+++ /dev/null
-# TensorFlowGpuConfig.cmake
-
-function(_Build_TfliteGpuDelagate_)
- nnas_find_package(TensorFlowGpuSource REQUIRED)
- nnas_find_package(AbseilSource REQUIRED)
- nnas_find_package(Farmhash REQUIRED)
- nnas_find_package(Fp16Source REQUIRED)
-
- if(NOT TARGET TensorFlowGpu)
- nnas_include(ExternalProjectTools)
- add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLiteGpu" TensorFlowLiteGpu)
- endif()
- set(TENSORFLOWGPU_SOURCE_DIR ${TENSORFLOWGPU_SOURCE_DIR} PARENT_SCOPE)
- set(TensorFlowGpu_DIR ${TensorFlowGpu_DIR} PARENT_SCOPE)
-endfunction(_Build_TfliteGpuDelagate_)
-
-if(BUILD_TENSORFLOW_LITE_GPU)
- _Build_TfliteGpuDelagate_()
- set(TensorFlowGpu_FOUND TRUE PARENT_SCOPE)
-else(BUILD_TENSORFLOW_LITE_GPU)
- set(TensorFlowGpu_FOUND FALSE PARENT_SCOPE)
-endif(BUILD_TENSORFLOW_LITE_GPU)
+++ /dev/null
-diff --git a/tensorflow/lite/delegates/gpu/api.h b/tensorflow/lite/delegates/gpu/api.h
-index 7892d0ce..fae4fb69 100644
---- a/tensorflow/lite/delegates/gpu/api.h
-+++ b/tensorflow/lite/delegates/gpu/api.h
-@@ -43,11 +43,18 @@ limitations under the License.
- #include "tensorflow/lite/delegates/gpu/common/data_type.h"
- #include "tensorflow/lite/delegates/gpu/common/status.h"
- #include "tensorflow/lite/delegates/gpu/common/util.h"
-+
-+#ifdef TFLITE_GPU_LIB_FIX
- #include <vulkan/vulkan.h>
-+#endif
-
- #define GL_NO_PROTOTYPES
- #define EGL_NO_PROTOTYPES
-+
-+#ifdef TFLITE_GPU_LIB_FIX
- #include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
-+#endif
-+
- #undef GL_NO_PROTOTYPES
- #undef EGL_NO_PROTOTYPES
-
-@@ -80,6 +87,7 @@ enum class ObjectType {
- VULKAN_TEXTURE
- };
-
-+#ifdef TFLITE_GPU_LIB_FIX
- struct OpenGlBuffer {
- OpenGlBuffer() = default;
- explicit OpenGlBuffer(GLuint new_id) : id(new_id) {}
-@@ -95,6 +103,7 @@ struct OpenGlTexture {
- GLuint id = GL_INVALID_INDEX;
- GLenum format = GL_INVALID_ENUM;
- };
-+#endif
-
- struct OpenClBuffer {
- OpenClBuffer() = default;
-@@ -111,6 +120,7 @@ struct OpenClTexture {
- // TODO(akulik): should it specify texture format?
- };
-
-+#ifdef TFLITE_GPU_LIB_FIX
- struct VulkanBuffer {
- VulkanBuffer() = default;
- explicit VulkanBuffer(VkBuffer buffer_, VkDeviceSize size_,
-@@ -143,6 +153,7 @@ struct VulkanMemory {
- VkDeviceSize size;
- VkDeviceSize offset;
- };
-+#endif
-
- struct CpuMemory {
- CpuMemory() = default;
-@@ -228,10 +239,15 @@ bool IsValid(const TensorObjectDef& def);
- // @return the number of elements in a tensor object.
- uint32_t NumElements(const TensorObjectDef& def);
-
-+#ifdef TFLITE_GPU_LIB_FIX
- using TensorObject =
- absl::variant<absl::monostate, OpenGlBuffer, OpenGlTexture, CpuMemory,
- OpenClBuffer, OpenClTexture, VulkanBuffer, VulkanTexture>;
--
-+#else
-+using TensorObject =
-+ absl::variant<absl::monostate, CpuMemory,
-+ OpenClBuffer, OpenClTexture>;
-+#endif
- // @return true if object is set and corresponding values are defined.
- bool IsValid(const TensorObjectDef& def, const TensorObject& object);
-
-diff --git a/tensorflow/lite/delegates/gpu/cl/api.h b/tensorflow/lite/delegates/gpu/cl/api.h
-index 65671117..c339f3f0 100644
---- a/tensorflow/lite/delegates/gpu/cl/api.h
-+++ b/tensorflow/lite/delegates/gpu/cl/api.h
-@@ -20,7 +20,9 @@ limitations under the License.
- #define EGL_NO_PROTOTYPES
- #endif
-
-+#ifdef TFLITE_GPU_LIB_FIX
- #include <EGL/egl.h>
-+#endif
-
- #include <cstdint>
- #include <memory>
-@@ -115,9 +117,10 @@ struct InferenceEnvironmentOptions {
- // It is the error to set egl_display, egl_context AND context at the same
- // time. If egl_display and egl_context are set, they will be used to create
- // GL-aware CL context.
-+#ifdef TFLITE_GPU_LIB_FIX
- EGLDisplay egl_display = EGL_NO_DISPLAY;
- EGLContext egl_context = EGL_NO_CONTEXT;
--
-+#endif //TFLITE_GPU_LIB_FIX
- // Should contain data returned from
- // InferenceEnvironment::GetSerializedBinaryCache method.
- // Invalid or incompatible data will be discarded. Compiled binary may become
-@@ -125,7 +128,11 @@ struct InferenceEnvironmentOptions {
- absl::Span<const uint8_t> serialized_binary_cache;
-
- bool IsGlAware() const {
-+#ifdef TFLITE_GPU_LIB_FIX
- return egl_context != EGL_NO_CONTEXT && egl_display != EGL_NO_DISPLAY;
-+#else //TFLITE_GPU_LIB_FIX
-+ return false;
-+#endif //TFLITE_GPU_LIB_FIX
- }
- };
-
-diff --git a/tensorflow/lite/delegates/gpu/cl/arguments.h b/tensorflow/lite/delegates/gpu/cl/arguments.h
-index a5435c4f..e088355b 100644
---- a/tensorflow/lite/delegates/gpu/cl/arguments.h
-+++ b/tensorflow/lite/delegates/gpu/cl/arguments.h
-@@ -23,7 +23,9 @@ limitations under the License.
- #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
- #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
- #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
-+#ifdef TFLITE_GPU_LIB_FIX
- #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
-+#endif
- #include "tensorflow/lite/delegates/gpu/cl/util.h"
- #include "tensorflow/lite/delegates/gpu/common/access_type.h"
- #include "tensorflow/lite/delegates/gpu/common/status.h"
-@@ -78,11 +80,12 @@ class Arguments : public ArgumentsBinder {
- ~Arguments() override = default;
-
- private:
-+#ifdef TFLITE_GPU_LIB_FIX
- friend flatbuffers::Offset<data::Arguments> Encode(
- const Arguments& args, flatbuffers::FlatBufferBuilder* builder);
- friend absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
- Arguments* args);
--
-+#endif
- void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc);
- void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc);
- void AddImage2DArray(const std::string& name,
-diff --git a/tensorflow/lite/delegates/gpu/cl/gpu_object.h b/tensorflow/lite/delegates/gpu/cl/gpu_object.h
-index abd77a44..ac1b7f00 100644
---- a/tensorflow/lite/delegates/gpu/cl/gpu_object.h
-+++ b/tensorflow/lite/delegates/gpu/cl/gpu_object.h
-@@ -23,7 +23,9 @@ limitations under the License.
-
- #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
- #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
-+#ifdef TFLITE_GPU_LIB_FIX
- #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
-+#endif
- #include "tensorflow/lite/delegates/gpu/common/access_type.h"
- #include "tensorflow/lite/delegates/gpu/common/data_type.h"
- #include "tensorflow/lite/delegates/gpu/common/status.h"
-@@ -165,10 +167,12 @@ class GPUObjectDescriptor {
- AccessType GetAccess() const { return access_type_; }
-
- protected:
-+#ifdef TFLITE_GPU_LIB_FIX
- friend flatbuffers::Offset<data::GPUObjectDescriptor> Encode(
- const GPUObjectDescriptor& desc, flatbuffers::FlatBufferBuilder* builder);
- friend void Decode(const data::GPUObjectDescriptor* fb_obj,
- GPUObjectDescriptor* obj);
-+#endif
- mutable std::map<std::string, std::string> state_vars_;
- AccessType access_type_;
- };
-diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
-index ca0c0319..f3cbc863 100644
---- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc
-+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
-@@ -151,6 +151,7 @@ CLNode& CLNode::operator=(CLNode&& node) {
- return *this;
- }
-
-+#ifdef TFLITE_GPU_LIB_FIX
- absl::Status InferenceContext::InitFromGraph(
- const CreateInferenceInfo& create_info, const GraphFloat32& graph,
- Environment* env, std::vector<uint8_t>* serialized_model) {
-@@ -239,6 +240,7 @@ absl::Status InferenceContext::RestoreDeserialized(
- }
- return absl::OkStatus();
- }
-+#endif
-
- absl::Status InferenceContext::InitFromGraphWithTransforms(
- const CreateInferenceInfo& create_info, GraphFloat32* graph,
-diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.h b/tensorflow/lite/delegates/gpu/cl/inference_context.h
-index ec8055eb..871af9dd 100644
---- a/tensorflow/lite/delegates/gpu/cl/inference_context.h
-+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.h
-@@ -31,7 +31,9 @@ limitations under the License.
- #include "tensorflow/lite/delegates/gpu/cl/model_hints.h"
- #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
- #include "tensorflow/lite/delegates/gpu/cl/precision.h"
-+#ifdef TFLITE_GPU_LIB_FIX
- #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
-+#endif
- #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
- #include "tensorflow/lite/delegates/gpu/common/model.h"
- #include "tensorflow/lite/delegates/gpu/common/status.h"
-@@ -100,12 +102,14 @@ class InferenceContext {
- private:
- enum TensorMemoryType { STRONG_SHAPE = 0, BUFFER = 1, VARIABLE = 2 };
-
-+#ifdef TFLITE_GPU_LIB_FIX
- friend flatbuffers::Offset<data::InferenceContext> Encode(
- const InferenceContext& inference,
- flatbuffers::FlatBufferBuilder* builder);
- friend absl::Status Decode(CLContext* context,
- const data::InferenceContext* fb_inference,
- InferenceContext* inference);
-+#endif
-
- void CopyInAndOutIds(const GraphFloat32& graph);
- absl::Status ConvertOperations(const DeviceInfo& device_info,
-diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
-index 57d8690c..8178e2de 100644
---- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
-+++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
-@@ -30,7 +30,9 @@ limitations under the License.
- #include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
- #include "tensorflow/lite/delegates/gpu/cl/precision.h"
- #include "tensorflow/lite/delegates/gpu/cl/program_cache.h"
-+#ifdef TFLITE_GPU_LIB_FIX
- #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
-+#endif
- #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
- #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
- #include "tensorflow/lite/delegates/gpu/common/data_type.h"
-@@ -169,11 +171,12 @@ class GPUOperation {
- bool check_src_channels_size_ = false;
-
- protected:
-+#ifdef TFLITE_GPU_LIB_FIX
- friend flatbuffers::Offset<data::GPUOperation> Encode(
- const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder);
- friend absl::Status Decode(CLContext* context,
- const data::GPUOperation* fb_op, GPUOperation* op);
--
-+#endif
- virtual absl::Status BindArguments(ArgumentsBinder* args) {
- return absl::OkStatus();
- }
-diff --git a/tensorflow/lite/delegates/gpu/cl/program_cache.cc b/tensorflow/lite/delegates/gpu/cl/program_cache.cc
-index 285aa06d..f636a909 100644
---- a/tensorflow/lite/delegates/gpu/cl/program_cache.cc
-+++ b/tensorflow/lite/delegates/gpu/cl/program_cache.cc
-@@ -18,9 +18,13 @@ limitations under the License.
- #include <cstdint>
- #include <string>
-
-+#ifdef TFLITE_GPU_LIB_FIX
- #include "flatbuffers/flatbuffers.h" // from @flatbuffers
-+#endif
- #include "tensorflow/lite/delegates/gpu/cl/cl_program.h"
-+#ifdef TFLITE_GPU_LIB_FIX
- #include "tensorflow/lite/delegates/gpu/cl/compiled_program_cache_generated.h"
-+#endif
- #include "tensorflow/lite/delegates/gpu/cl/util.h"
- #include "tensorflow/lite/delegates/gpu/common/status.h"
- #include <farmhash.h>
-@@ -82,6 +86,7 @@ absl::Status ProgramCache::GetOrCreateCLKernel(const std::string& code,
- return GetOrCreateCLKernel(code, function_name, {}, context, device, result);
- }
-
-+#ifdef TFLITE_GPU_LIB_FIX
- absl::Status ProgramCache::AddSerializedCache(
- const CLContext& context, const CLDevice& device,
- absl::Span<const uint8_t> serialized_cache) {
-@@ -143,6 +148,7 @@ absl::Status ProgramCache::GetSerializedCache(
- builder.GetSize());
- return absl::OkStatus();
- }
-+#endif
-
- } // namespace cl
- } // namespace gpu
-diff --git a/tensorflow/lite/delegates/gpu/common/types.h b/tensorflow/lite/delegates/gpu/common/types.h
-index 4ddb46f3..2b692f0b 100644
---- a/tensorflow/lite/delegates/gpu/common/types.h
-+++ b/tensorflow/lite/delegates/gpu/common/types.h
-@@ -34,9 +34,9 @@ class alignas(2) half {
- HalfBits bits;
-
- half() = default;
--
-+#ifdef TFLITE_GPU_LIB_FIX
- half(const half& f) : bits(f.bits) {}
--
-+#endif
- explicit half(float other) { bits = fp16_ieee_from_fp32_value(other); }
-
- void operator=(float f) { *this = half(f); }
+++ /dev/null
-#
-# Download Tensorflow 2.4.1, use gpu delegate codes only
-#
-
-function(_TensorFlowGpuSource_Import)
- SET(PATCH_FILE_CHECK "20211014")
- SET(DATE_STAMP_PATH "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU.stamp")
-
- set(PATCH_DONE FALSE)
- if(EXISTS ${DATE_STAMP_PATH})
- file(STRINGS ${DATE_STAMP_PATH} OBTAINED_CONTENT)
- if(${OBTAINED_CONTENT} STREQUAL "${PATCH_FILE_CHECK}")
- set(PATCH_DONE "TRUE")
- endif()
- endif()
-
- if(${PATCH_DONE} STREQUAL "TRUE")
- message(STATUS "Skip downloading TensorFlowGpuSource")
- set(TENSORFLOWGPU_SOURCE_DIR "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU" PARENT_SCOPE)
- set(TensorFlowGpuSource_DIR "${TensorFlowGpuSource_DIR}" PARENT_SCOPE)
- set(TensorFlowGpuSource_FOUND TRUE PARENT_SCOPE)
- return()
- else(${PATCH_DONE} STREQUAL "TRUE")
- # PATCH_DONE FALSE
- message(STATUS "TensorFlowGpuSource patch not found!")
- endif(${PATCH_DONE} STREQUAL "TRUE")
-
- # Download TFLite Source Code
- nnas_include(ExternalSourceTools)
- nnas_include(OptionTools)
- envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
- envoption(TENSORFLOW_2_4_1_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.4.1.tar.gz)
- ExternalSource_Download(TFLITE_GPU_DELEGATE DIRNAME TENSORFLOW-2.4.1 ${TENSORFLOW_2_4_1_URL})
-
- # Patch for non used codes on onert backend/gpu_cl
- # ToDo: Do it more simpler
- set(TENSORFLOWGPU_SOURCE_DIR "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU")
-
- # remove & copy gpu delegate source codes only
- if(EXISTS ${TENSORFLOWGPU_SOURCE_DIR})
- file(REMOVE_RECURSE "${TENSORFLOWGPU_SOURCE_DIR}")
- endif()
-
- file(MAKE_DIRECTORY "${TENSORFLOWGPU_SOURCE_DIR}")
- execute_process(
- WORKING_DIRECTORY "${TFLITE_GPU_DELEGATE_SOURCE_DIR}"
- COMMAND bash -c "cp -r --parents ./tensorflow/lite/delegates/gpu ../TENSORFLOW_GPU"
- )
-
- # Create Stamp
- set(_remove_path "${TENSORFLOWGPU_SOURCE_DIR}.stamp")
- if(EXISTS ${_remove_path})
- file(REMOVE ${_remove_path})
- endif()
- execute_process(
- WORKING_DIRECTORY "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU"
- COMMAND bash -c "patch -p1 < ${CMAKE_CURRENT_LIST_DIR}/TensorFlowGpuSource/patch_for_gpu_cl_build.patch"
- )
- file(WRITE ${DATE_STAMP_PATH} "${PATCH_FILE_CHECK}")
- set(TENSORFLOWGPU_SOURCE_DIR "${TENSORFLOWGPU_SOURCE_DIR}" PARENT_SCOPE)
- set(TensorFlowGpuSource_DIR "${TensorFlowGpuSource_DIR}" PARENT_SCOPE)
- set(TensorFlowGpuSource_FOUND TRUE PARENT_SCOPE)
-
- execute_process(
- WORKING_DIRECTORY "${NNAS_EXTERNALS_DIR}"
- COMMAND bash -c "rm -rf ${TFLITE_GPU_DELEGATE_SOURCE_DIR}.stamp"
- COMMAND bash -c "rm -rf ${TFLITE_GPU_DELEGATE_SOURCE_DIR}"
- )
-endfunction(_TensorFlowGpuSource_Import)
-
-if(NOT TensorFlowGpuSource_FOUND)
- _TensorFlowGpuSource_Import()
-else()
- set(TensorFlowGpuSource_FOUND FALSE PARENT_SCOPE)
-endif(NOT TensorFlowGpuSource_FOUND)
+++ /dev/null
-#
-# Tensorflow Lite GPU delegate library 2.4.1
-#
-
-set(LIB_TENSORFLOW_GPU_DELEGATE "TensorFlowGpu")
-
-#TENSORFLOWGPU_SOURCE_DIR
-set(REF_TENSORFLOW_SRC_BASE ${TENSORFLOWGPU_SOURCE_DIR})
-set(REF_TENSORFLOW_LITE_SRC_BASE ${REF_TENSORFLOW_SRC_BASE}/tensorflow/lite)
-set(REF_TENSORFLOW_LITE_GPU_DELEGATE_SRC_BASE "${REF_TENSORFLOW_LITE_SRC_BASE}/delegates/gpu")
-
-set(SRC_BASE "${REF_TENSORFLOW_LITE_GPU_DELEGATE_SRC_BASE}")
-file(GLOB GPU_CL_SRC_LIST "${SRC_BASE}/cl/*.cc"
- "${SRC_BASE}/cl/kernels/*.cc"
- "${SRC_BASE}/cl/kernels/special/*.cc"
- "${SRC_BASE}/cl/kernels/selectors/*.cc"
- "${SRC_BASE}/cl/selectors/*.cc"
- "${SRC_BASE}/common/*.cc"
-# Available, but not needed yet
-# "${SRC_BASE}/common/default/*.cc"
-# "${SRC_BASE}/common/memory_managements/*.cc"
-# "${SRC_BASE}/common/transformations/*.cc"
- )
-
-file(GLOB GPU_CL_HDRS_GLOB "${SRC_BASE}/cl/*.h"
- "${SRC_BASE}/cl/kernels/*.h"
- "${SRC_BASE}/cl/kernels/special/*.h"
- "${SRC_BASE}/cl/kernels/selectors/*.h"
- "${SRC_BASE}/cl/selectors/*.h"
- "${SRC_BASE}/common/*.h"
- "${SRC_BASE}/common/default/*.h"
- "${SRC_BASE}/common/memory_managements/*.h"
- "${SRC_BASE}/common/transformations/*.h"
- )
-list(APPEND GPU_CL_SRC_LIST "${_GPU_CL_HDRS_GLOB}")
-
-file(GLOB REMOVE_TEST_SRCS "${SRC_BASE}/cl/*_test*.cc"
- "${SRC_BASE}/cl/testing/*.cc"
- "${SRC_BASE}/cl/kernels/*_test*.cc"
- "${SRC_BASE}/common/*_test*.cc"
- "${SRC_BASE}/common/transformations/*_test*.cc"
- )
-# Not available
-file(GLOB REMOVE_SRCS "${SRC_BASE}/cl/*gl*.cc"
- "${SRC_BASE}/cl/gpu_api_delegate.cc"
- "${SRC_BASE}/cl/serialization.cc"
- "${SRC_BASE}/common/lstm_parser.cc"
- "${SRC_BASE}/common/model_builder.cc"
- "${SRC_BASE}/common/model_builder_helper.cc"
- "${SRC_BASE}/common/object_reader.cc"
- "${SRC_BASE}/common/quantization_util.cc"
- "${SRC_BASE}/common/memory_management/*_test.cc"
- )
-
-list(REMOVE_ITEM GPU_CL_SRC_LIST ${REMOVE_TEST_SRCS})
-list(REMOVE_ITEM GPU_CL_SRC_LIST ${REMOVE_SRCS})
-list(APPEND TFLITE_GPU_SRCS ${GPU_CL_SRC_LIST})
-
-add_library(${LIB_TENSORFLOW_GPU_DELEGATE} STATIC ${TFLITE_GPU_SRCS})
-target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${Fp16Source_DIR}/include")
-target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${TENSORFLOWGPU_SOURCE_DIR}")
-target_link_libraries(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE abseil farmhash fp16)
-
-add_library(tflitegpu_ignore_warnings INTERFACE)
-target_compile_options(tflitegpu_ignore_warnings INTERFACE -Wno-unused-parameter -Wno-sign-compare)
-target_link_libraries(${LIB_TENSORFLOW_GPU_DELEGATE} INTERFACE tflitegpu_ignore_warnings)
-
-# GL codes are not used on gpu_cl
-target_compile_options(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "-DCL_DELEGATE_NO_GL")
-
-# Applying PIC first, currently used on gpu_cl only
-set_target_properties(${LIB_TENSORFLOW_GPU_DELEGATE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
FILES_TO_CHECK_PYTHON=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '\.py$'`)
# Exceptional case: one-cmds don't have '.py' extension: ignore non-python source (cmake, etc) and ignore shell script: one-prepare-venv
FILES_TO_CHECK_PYTHON+=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '^compiler/one-cmds/[^(\./)]*$' | egrep -v '^compiler/one-cmds/one-prepare-venv$'`)
+ # Exceptional case: onecc-docker don't have '.py' extension.
+ FILES_TO_CHECK_PYTHON+=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '^compiler/onecc-docker/onecc-docker$'`)
+ # Exceptional case: visq don't have '.py' extension.
+ FILES_TO_CHECK_PYTHON+=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '^compiler/visq/visq$'`)
for s in ${DIRECTORIES_NOT_TO_BE_TESTED[@]}; do
skip=${s#'.'/}/
"${LCOV_PATH}" -e "${RAW_COVERAGE_INFO_PATH}" -o "${EXTRACTED_COVERAGE_INFO_PATH}" \
"${CANDIDATES[@]}"
-
-opencl_files=($(find ./runtime/onert/backend/gpu_cl/open_cl/ \( -name "*.cc" -o -name "*.h" \) -exec realpath {} \; ))
-
# Exclude test files from coverage report
# Exclude flatbuffer generated files from coverage report
"${LCOV_PATH}" -r "${EXTRACTED_COVERAGE_INFO_PATH}" -o "${EXCLUDED_COVERAGE_INFO_PATH}" \
- '*.test.cpp' '*.test.cc' '*/test/*' '*/tests/*' '*_schema_generated.h' "${opencl_files[@]}"
+ '*.test.cpp' '*.test.cc' '*/test/*' '*/tests/*' '*_schema_generated.h'
# Final coverage data
cp -v ${EXCLUDED_COVERAGE_INFO_PATH} ${COVERAGE_INFO_PATH}
+one (1.22.0) bionic; urgency=medium
+
+ * Introduce new optimization options: `unroll_unidirseqlstm`, `forward_transpose_op`, `fold_fully_connected`, `fuse_prelu`
+ * Support more Ops for fake quantization: `Depth2Space`, `Space2Depth`, `Pack`, `Unpack`, `Abs`
+ * Support more Ops for quantization: `Abs`, `ReduceProd`
+ * Introduce _visq_ tool for quantization error visualization
+ * Introduce _Environment_ section into configuration file
+ * Improve speed of `convert_nchw_to_nhwc` option
+ * Support `Add`, `Mul` of index-type (int32, int64) tensors in _one-quantize_
+ * Support ubuntu 20.04
+
+ -- seongwoo <mhs4670go@naver.com> Fri, 24 Mar 2023 13:58:16 +0900
+
one (1.21.0) bionic; urgency=medium
* Support unrolling of LSTM and RNN Ops in `one-import-onnx` tool
# {FILES_TO_INSTALL} {DEST_DIR}
# bin
usr/bin/circledump usr/share/one/bin/
+usr/bin/circle-opselector usr/share/one/bin/
usr/bin/circle-tensordump usr/share/one/bin/
usr/bin/tflchef usr/share/one/bin/
usr/bin/tflchef-file usr/share/one/bin/
# bin
usr/bin/circle2circle usr/share/one/bin/
usr/bin/circle-eval-diff usr/share/one/bin/
+usr/bin/circle-interpreter usr/share/one/bin/
usr/bin/circle-operator usr/share/one/bin/
usr/bin/circle-partitioner usr/share/one/bin/
usr/bin/circle-quantizer usr/share/one/bin/
+usr/bin/dalgona usr/share/one/bin/
usr/bin/generate_bcq_metadata.py usr/share/one/bin/
usr/bin/generate_bcq_output_arrays.py usr/share/one/bin/
-usr/bin/model2nnpkg.sh usr/share/one/bin/
+usr/bin/model2nnpkg usr/share/one/bin/
usr/bin/onecc usr/share/one/bin/
usr/bin/onecc.template.cfg usr/share/one/bin/
usr/bin/one-build usr/share/one/bin/
usr/bin/onelib/OptionBuilder.py usr/share/one/bin/onelib/
usr/bin/onelib/TopologicalSortHelper.py usr/share/one/bin/onelib/
usr/bin/onelib/WorkflowRunner.py usr/share/one/bin/onelib/
+usr/bin/onelib/Command.py usr/share/one/bin/onelib/
+usr/bin/onelib/utils.py usr/share/one/bin/onelib/
+usr/bin/onelib/export_constant.py usr/share/one/bin/onelib/
usr/bin/onnx_legalizer.py usr/share/one/bin/
usr/bin/rawdata2hdf5 usr/share/one/bin/
usr/bin/record-minmax usr/share/one/bin/
usr/bin/tf2nnpkg usr/share/one/bin/
usr/bin/tf2tfliteV2.py usr/share/one/bin/
usr/bin/tflite2circle usr/share/one/bin/
-usr/bin/utils.py usr/share/one/bin/
+usr/bin/visq usr/share/one/bin/
+usr/bin/visqlib/DumpFakeQuantFM.py usr/share/one/bin/visqlib/
+usr/bin/visqlib/DumpFP32FM.py usr/share/one/bin/visqlib/
+usr/bin/visqlib/Palette.py usr/share/one/bin/visqlib/
+usr/bin/visqlib/QErrorComputer.py usr/share/one/bin/visqlib/
+usr/bin/visqlib/Util.py usr/share/one/bin/visqlib/
# lib
usr/lib/* usr/share/one/lib/
# doc
usr/doc/* usr/share/one/doc/
+# optimization
+usr/optimization/* usr/share/one/optimization/
#!/usr/bin/make -f
export DH_VERBOSE = 1
export NNAS_BUILD_PREFIX = build
-export PRESET = 20220323
+export PRESET = 20221125
export _DESTDIR = debian/tmp/usr
%:
cmake --build "$(NNAS_BUILD_PREFIX)/nncc" -- install
override_dh_install:
- install -t "$(_DESTDIR)/bin" -D "tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
install -T -m 755 -D "infra/packaging/res/tf2nnpkg.${PRESET}" "$(_DESTDIR)/bin/tf2nnpkg"
dh_install
-override_dh_builddeb:
- dh_builddeb --destdir=$(NNAS_BUILD_PREFIX)
export DH_VERBOSE = 1
export _DESTDIR = debian/tmp/
export BUILD_TYPE=release
-export OPTIONS=-DBUILD_LOGGING=0 -DBUILD_TFLITE_COMPARATOR_TEST_TOOL=0 -DBUILD_NNPACKAGE_RUN=0 -DBUILD_TFLITE_RUN=0 -DBUILD_NNAPI_TEST=0 -DBUILD_RUNTIME_NNAPI_TEST=0 -DBUILD_TFLITE_BENCHMARK_MODEL=0 -DBUILD_TFLITE_VANILLA_RUN=0 -DBUILD_TENSORFLOW_LITE_2_8_0=0 -DBUILD_TENSORFLOW_LITE=0
+export OPTIONS=-DBUILD_LOGGING=0 -DBUILD_TFLITE_COMPARATOR_TEST_TOOL=0 -DBUILD_ONERT_RUN=0 -DBUILD_TFLITE_RUN=0 -DBUILD_RUNTIME_NNAPI_TEST=0 -DBUILD_TFLITE_VANILLA_RUN=0 -DBUILD_TENSORFLOW_LITE_2_8_0=0 -DBUILD_TENSORFLOW_LITE=0
export DEBIAN_BUILD=1
export INSTALL_PATH=debian/tmp/usr/
%:
# Additonal tools
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive \
- apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl
-RUN apt-get update && apt-get -qqy install python3.8 python3.8-venv
+ apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv python3-dev hdf5-tools pylint curl
+RUN apt-get update && apt-get -qqy install python3.8 python3.8-venv python3.8-dev
RUN python3 -m pip install --upgrade pip
-RUN python3 -m pip install yapf==0.22.0 numpy
+RUN python3 -m pip install yapf==0.22.0 numpy flatbuffers
RUN python3.8 -m pip install --upgrade pip
-RUN python3.8 -m pip install numpy
+RUN python3.8 -m pip install numpy flatbuffers
# Install google test (source)
RUN apt-get update && apt-get -qqy install libgtest-dev
# setup adb server
EXPOSE 5037
+# Setup user to match host user, and give superuser permissions
+ARG USER_ID=1000
+ARG GROUP_ID=${USER_ID}
+RUN addgroup --gid ${GROUP_ID} ubuntu && adduser --disabled-password --gecos '' --uid ${USER_ID} --gid ${GROUP_ID} ubuntu && usermod -aG sudo ubuntu
+RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+RUN echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+
# Clean archives (to reduce image size)
RUN apt-get clean -y
+
+# Set user to the one we just created
+USER ${USER_ID}
# Additonal tools
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive \
- apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl
+ apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv python3-dev hdf5-tools pylint curl
RUN python3 -m pip install --upgrade pip
-RUN python3 -m pip install yapf==0.22.0 numpy
+RUN python3 -m pip install yapf==0.22.0 numpy flatbuffers
# Install google test (source)
RUN apt-get update && apt-get -qqy install libgtest-dev
RUN unzip -d tmp sdb.zip && rm sdb.zip
RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp/*
+# Setup user to match host user, and give superuser permissions
+ARG USER_ID=1000
+ARG GROUP_ID=${USER_ID}
+RUN addgroup --gid ${GROUP_ID} ubuntu && adduser --disabled-password --gecos '' --uid ${USER_ID} --gid ${GROUP_ID} ubuntu && usermod -aG sudo ubuntu
+RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+RUN echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+
# Clean archives (to reduce image size)
RUN apt-get clean -y
+
+# Set user to the one we just created
+USER ${USER_ID}
--- /dev/null
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM ubuntu:jammy
+
+ARG UBUNTU_MIRROR
+
+# Install 'add-apt-repository'
+RUN apt-get update && apt-get -qqy install software-properties-common
+
+# Build tool
+RUN apt-get update && apt-get -qqy install build-essential cmake scons git lcov g++-arm-linux-gnueabihf g++-aarch64-linux-gnu
+
+# Debian build tool
+RUN apt-get update && apt-get -qqy install fakeroot devscripts debhelper python3-all dh-python
+
+# Install extra dependencies (Caffe, nnkit)
+RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoogle-glog-dev libatlas-base-dev libhdf5-dev
+
+# Install protocol buffer
+RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
+
+# Additonal tools
+# TODO install clang-format (No official clang-format-8 package for ubuntu jammy)
+RUN apt-get update && \
+ DEBIAN_FRONTEND=noninteractive \
+ apt-get -qqy install doxygen graphviz wget zip unzip python3 python3-pip python3-venv python3-dev hdf5-tools pylint curl
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install yapf==0.22.0 numpy flatbuffers
+
+# Install google test (source)
+RUN apt-get update && apt-get -qqy install libgtest-dev
+
+# TODO: Install gbs & sdb
+# gbs & sdb are not support ubuntu jammy yet
+
+# Setup user to match host user, and give superuser permissions
+ARG USER_ID=1000
+ARG GROUP_ID=${USER_ID}
+RUN apt-get update && apt-get -qqy install sudo
+RUN addgroup --gid ${GROUP_ID} ubuntu && adduser --disabled-password --gecos '' --uid ${USER_ID} --gid ${GROUP_ID} ubuntu && usermod -aG sudo ubuntu
+RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+RUN echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+
+# Clean archives (to reduce image size)
+RUN apt-get clean -y
+
+# Set user to the one we just created
+USER ${USER_ID}
# title of most generated pages and in a few other places.
# The default value is: My Project.
-PROJECT_NAME = nnas
+PROJECT_NAME = "ONE - On-device Neural Engine"
# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
# could be handy for archiving the generated documentation or if some version
# members will be omitted, etc.
# The default value is: NO.
-OPTIMIZE_OUTPUT_FOR_C = NO
+OPTIMIZE_OUTPUT_FOR_C = YES
# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
# Python sources only. Doxygen will then generate output that is more tailored
# list. This list is created by putting \todo commands in the documentation.
# The default value is: YES.
-GENERATE_TODOLIST = YES
+GENERATE_TODOLIST = NO
# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
# list. This list is created by putting \test commands in the documentation.
# The default value is: YES.
-GENERATE_TESTLIST = YES
+GENERATE_TESTLIST = NO
# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
# list. This list is created by putting \bug commands in the documentation.
# the documentation.
# The default value is: YES.
-GENERATE_DEPRECATEDLIST= YES
+GENERATE_DEPRECATEDLIST= NO
# The ENABLED_SECTIONS tag can be used to enable conditional documentation
# sections, marked by \if <section_label> ... \endif and \cond <section_label>
# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
# Note: If this tag is empty the current directory is searched.
-INPUT =
+INPUT = README.md \
+ docs/howto/ \
+ docs/overview/ \
+ docs/runtime/ \
+ compute/ \
+ compiler/ \
+ onert-micro/ \
+ runtime/
# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
# Note that relative paths are relative to the directory from which doxygen is
# run.
-EXCLUDE = Product/ \
- build/ \
- doxygen/ \
- report/ \
- externals/ \
- packaging/ \
- runtimes/contrib/ \
- runtimes/pure_arm_compute/ \
- tests/ \
- tools/
+EXCLUDE =
# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
# directories that are symbolic links (a Unix file system feature) are excluded
# from the input.
# The default value is: NO.
-EXCLUDE_SYMLINKS = NO
+EXCLUDE_SYMLINKS = YES
# If the value of the INPUT tag contains directories, you can use the
# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
# Note that the wildcards are matched against the file with absolute path, so to
# exclude all test directories for example use the pattern */test/*
-EXCLUDE_PATTERNS =
+EXCLUDE_PATTERNS = *.test.* \
+ */test/* \
+ */tests/* \
+ */unittest/* \
+ *_generated.* \
+ */3rdparty/* \
+ */contrib/* \
+ */compiler/*/*.md \
+ */compute/*/*.md \
+ */runtime/*/*.md
+
# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
# (namespaces, classes, functions, etc.) that should be excluded from the
# (index.html). This can be useful if you have a project on for instance GitHub
# and want to reuse the introduction page also for the doxygen output.
-USE_MDFILE_AS_MAINPAGE = docs/nnfw/roadmap.md
+USE_MDFILE_AS_MAINPAGE = README.md
#---------------------------------------------------------------------------
# Configuration options related to source browsing
# classes and enums directly into the documentation.
# The default value is: NO.
-INLINE_SOURCES = NO
+INLINE_SOURCES = YES
# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
# special comment blocks from generated source code fragments. Normal C, C++ and
# function all documented functions referencing it will be listed.
# The default value is: NO.
-REFERENCED_BY_RELATION = NO
+REFERENCED_BY_RELATION = YES
# If the REFERENCES_RELATION tag is set to YES then for each documented function
# all documented entities called/used by that function will be listed.
# The default value is: NO.
-REFERENCES_RELATION = NO
+REFERENCES_RELATION = YES
# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
# to YES then the hyperlinks from functions in REFERENCES_RELATION and
# The default value is: YES.
# This tag requires that the tag HAVE_DOT is set to YES.
-CLASS_GRAPH = YES
+CLASS_GRAPH = NO
# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
# graph for each documented class showing the direct and indirect implementation
# The default value is: NO.
# This tag requires that the tag HAVE_DOT is set to YES.
-TEMPLATE_RELATIONS = NO
+TEMPLATE_RELATIONS = YES
# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
# YES then doxygen will generate a graph for each documented file showing the
# The default value is: YES.
# This tag requires that the tag HAVE_DOT is set to YES.
-INCLUDE_GRAPH = YES
+INCLUDE_GRAPH = NO
# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
# set to YES then doxygen will generate a graph for each documented file showing
# The default value is: YES.
# This tag requires that the tag HAVE_DOT is set to YES.
-INCLUDED_BY_GRAPH = YES
+INCLUDED_BY_GRAPH = NO
# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
# dependency graph for every global function or class method.
# The default value is: NO.
# This tag requires that the tag HAVE_DOT is set to YES.
-CALL_GRAPH = YES
+CALL_GRAPH = NO
# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
# dependency graph for every global function or class method.
# The default value is: NO.
# This tag requires that the tag HAVE_DOT is set to YES.
-CALLER_GRAPH = YES
+CALLER_GRAPH = NO
# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
# hierarchy of all classes instead of a textual one.
# found. If left blank, it is assumed the dot tool can be found in the path.
# This tag requires that the tag HAVE_DOT is set to YES.
-DOT_PATH = /usr/local/bin/dot
+DOT_PATH =
# The DOTFILE_DIRS tag can be used to specify one or more directories that
# contain dot files that are included in the documentation (see the \dotfile
# Minimum value: 0, maximum value: 10000, default value: 50.
# This tag requires that the tag HAVE_DOT is set to YES.
-DOT_GRAPH_MAX_NODES = 50
+DOT_GRAPH_MAX_NODES = 500
# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
# generated by dot. A depth value of 3 means that only nodes reachable from the
-# The libboost 1.74 uses IN_LIST operator, which requires the policy CMP0057, in a CMake file.
-# This policy requires ``cmake_minimum_required(VERSION 3.3)``.
-# Run "cmake --help-policy CMP0057" for policy details.
-cmake_minimum_required(VERSION 3.3)
+cmake_minimum_required(VERSION 3.10)
project(nncc)
set(CMAKE_INSTALL_RPATH "$ORIGIN/../lib:$ORIGIN/")
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
-# This feature works with CMake 3.5.2 or later. However, using previous versions does not produce
-# an error. We are still officially using CMake 3.1.0, but put this code for the sake of semantic
-# support in various development tools.
-# Todo: Someday, CMake needs to be updated to 3.7.2 or later to take advantage of improvements
-# such as `cmake-server`.
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(NNAS_PROJECT_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../.." CACHE
#
BUILD_TYPE?=Debug
+BUILD_JOBS?=1
CURRENT_DIR=$(shell pwd)
BUILDFOLDER=build
ARM32_BUILD_ITEMS+=;loco;locop;logo-core;logo
ARM32_BUILD_ITEMS+=;safemain;mio-circle04;mio-tflite280
ARM32_BUILD_ITEMS+=;dio-hdf5
+ARM32_BUILD_ITEMS+=;luci-compute
ARM32_BUILD_ITEMS+=;foder;circle-verify;souschef;arser;vconone
ARM32_BUILD_ITEMS+=;luci
ARM32_BUILD_ITEMS+=;luci-interpreter
ARM32_HOST_ITEMS+=;hermes;hermes-std
ARM32_HOST_ITEMS+=;loco;locop;logo-core;logo
ARM32_HOST_ITEMS+=;safemain;mio-circle04;mio-tflite280
+ARM32_HOST_ITEMS+=;luci-compute
ARM32_HOST_ITEMS+=;foder;circle-verify;souschef;arser;vconone
ARM32_HOST_ITEMS+=;luci
ARM32_HOST_ITEMS+=;luci-interpreter
ARM32_HOST_ITEMS+=;luci-eval-driver;luci-value-test
-_SPACE_:=
-_SPACE_+=
+_EMPTY_:=
+_SPACE_:=$(_EMPTY_) $(_EMPTY_)
ARM32_BUILD_WHITELIST=$(subst $(_SPACE_),,$(ARM32_BUILD_ITEMS))
ARM32_HOST_WHITELIST=$(subst $(_SPACE_),,$(ARM32_HOST_ITEMS))
-NNCC_CFG_OPTION+= -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_COVERAGE=OFF
+NNCC_CFG_OPTION+= -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_COVERAGE=OFF -DEXTERNALS_BUILD_THREADS=$(BUILD_JOBS)
NNCC_CFG_STRICT= -DENABLE_STRICT_BUILD=ON
$(NNCC_ARM32_DEBUG) $(NNCC_CFG_STRICT) \
-DCMAKE_TOOLCHAIN_FILE=$(ARM32_TOOLCHAIN_FILE) \
-DCMAKE_INSTALL_PREFIX="$(ARM32_INSTALL_FOLDER)" \
+ -DBUILD_ARM32_NEON=ON \
-DENABLE_TEST=ON
+# TODO remove BUILD_ARM32_NEON=ON as default is ON, after a while.
+# explictly added to prevent using cached 'BUILD_ARM32_NEON=OFF'
#
# builds
#
int_build_arm32_host:
- NNCC_WORKSPACE=$(BUILD_ARM32_HOST) ./nncc build -j1
+ NNCC_WORKSPACE=$(BUILD_ARM32_HOST) ./nncc build -j$(BUILD_JOBS)
int_build_arm32:
ROOTFS_DIR=$(ROOTFS_ARM) TARGET_ARCH=armv7l \
BUILD_HOST_EXEC=$(CURRENT_DIR)/$(BUILD_ARM32_HOST) \
- NNCC_WORKSPACE=$(BUILD_ARM32_FOLDER) ./nncc build -j1
+ NNCC_WORKSPACE=$(BUILD_ARM32_FOLDER) ./nncc build -j$(BUILD_JOBS)
#
# host test; run test in host to generate random input and expected outputs
# addition for arm-linux
set(FLAGS_COMMON ${FLAGS_COMMON}
- "-mcpu=cortex-a7"
+ "-march=armv7-a"
+ "-mtune=cortex-a8"
"-mfloat-abi=hard"
- "-ftree-vectorize"
"-mfp16-format=ieee"
)
if(BUILD_ARM32_NEON)
set(FLAGS_COMMON ${FLAGS_COMMON}
- "-mfpu=neon-vfpv4"
+ "-mfpu=vfpv3-d16"
+ "-ftree-vectorize"
)
else(BUILD_ARM32_NEON)
message(STATUS "ARMv7l: NEON is disabled")
export GIT_SSL_NO_VERIFY=1
-DOCKER_VOLUMES=" -v $HOST_PATH:$DOCKER_PATH"
+DOCKER_VOLUMES+=" -v $HOST_PATH:$DOCKER_PATH"
DOCKER_ENV_VARS+=" -e http_proxy"
DOCKER_ENV_VARS+=" -e no_proxy"
-cmake_minimum_required(VERSION 3.5.1)
+cmake_minimum_required(VERSION 3.10)
project(nnfw)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_EXTENSIONS OFF)
-# This feature works with CMake 3.5.2 or later. However, using previous versions does not produce
-# an error. We are still officially using CMake 3.5.1, but put this code for the sake of semantic
-# support in various development tools.
-# Todo: Someday, CMake needs to be updated to 3.7.2 or later to take advantage of improvements
-# such as `cmake-server`.
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# identify platform: HOST_PLATFORM, TARGET_PLATFORM and related
option(BUILD_RUNTIME_NNFW_API_TEST "Build Runtime NNFW API Tests" ON)
option(BUILD_TFLITE_RUN "Build tflite-run" ON)
option(BUILD_TFLITE_VANILLA_RUN "Build tflite-vanilla-run" OFF)
-option(BUILD_TFLITE_BENCHMARK_MODEL "Build tflite benchmark model" OFF)
-option(BUILD_NNAPI_TEST "Build nnapi_test" ON)
-option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON)
+option(BUILD_ONERT_RUN "Build onert_run" ON)
option(BUILD_TFLITE_LOADER "Build TensorFlow Lite loader" ON)
option(BUILD_CIRCLE_LOADER "Build circle loader" ON)
option(BUILD_TRIX_LOADER "Build trix loader" ON)
option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" ON)
option(INSTALL_TEST_SCRIPTS "Install test scripts" ON)
option(BUILD_GPU_CL "Build gpu_cl backend" OFF)
-option(BUILD_NPUD "Build NPU daemon" ON)
-option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" ON)
+option(BUILD_NPUD "Build NPU daemon" OFF)
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
#
# Default build configuration for contrib
#
-option(BUILD_ANDROID_TFLITE "Enable android support for TensorFlow Lite" OFF)
option(BUILD_ANDROID_BENCHMARK_APP "Enable Android Benchmark App" OFF)
option(BUILD_BENCHMARK_ACL "Build ARM Compute Library Benchmarks" OFF)
option(BUILD_DETECTION_APP "Build detection example app" OFF)
# Default external libraries source download and build configuration
#
option(DOWNLOAD_TENSORFLOW "Download Tensorflow source" ON)
-option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" OFF)
option(DOWNLOAD_ABSEIL "Download Abseil source" ON)
option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" OFF)
option(DOWNLOAD_EIGEN "Download Eigen source" ON)
option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" ON)
option(BUILD_BOOST "Build boost source" OFF)
option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" ON)
-option(BUILD_TENSORFLOW_LITE_2_8_0 "Build TensorFlow Lite 2.8.0 from the downloaded source" OFF)
option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" OFF)
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" ON)
option(DEBUG_ARMCOMPUTE "Build ARM Compute as debug type" OFF)
# aarch64 android cmake options
#
-# NOTE BUILD_ANDROID_TFLITE(JNI lib) is disabled due to BuiltinOpResolver issue.
-# tensorflow-lite does not build BuiltinOpResolver but JNI lib need it
-# Related Issue : #1403
-option(BUILD_ANDROID_TFLITE "Enable android support for TensorFlow Lite" OFF)
option(BUILD_ANDROID_BENCHMARK_APP "Enable Android Benchmark App" ON)
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
# Need boost library
option(DOWNLOAD_BOOST "Download boost source" ON)
option(BUILD_BOOST "Build boost source" ON)
option(BUILD_LOGGING "Build logging runtime" OFF)
-# Do not support npud
-option(BUILD_NPUD "Build NPU daemon" OFF)
+
+option(DOWNLOAD_OPENGL_HEADERS "Download Opengl_headers source" ON)
+option(DOWNLOAD_EGL_HEADERS "Download Egl_headers source" ON)
+option(DOWNLOAD_VULKAN "Download vulkan source" ON)
+option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
+option(DOWNLOAD_PYBIND11 "Download Pybind11 source" ON)
+option(BUILD_GPU_CL "Build gpu_cl backend" ON)
+option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
#
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
+option(DOWNLOAD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
option(BUILD_LOGGING "Build logging runtime" OFF)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+
+option(BUILD_NPUD "Build NPU daemon" ON)
+# Do not allow to use CONFIG option on Tizen
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
#
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
+option(DOWNLOAD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
-option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" ON)
+option(DOWNLOAD_OPENGL_HEADERS "Download Opengl_headers source" ON)
+option(DOWNLOAD_EGL_HEADERS "Download Egl_headers source" ON)
+option(DOWNLOAD_VULKAN "Download vulkan source" ON)
+
option(BUILD_GPU_CL "Build gpu_cl backend" ON)
option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
+
+option(BUILD_NPUD "Build NPU daemon" ON)
+# Do not allow to use CONFIG option on Tizen
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
option(BUILD_OPENCL_TOOL "Build OpenCL tool" ON)
+option(DOWNLOAD_OPENGL_HEADERS "Download Opengl_headers source" ON)
+option(DOWNLOAD_EGL_HEADERS "Download Egl_headers source" ON)
+option(DOWNLOAD_VULKAN "Download vulkan source" ON)
option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
-option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" ON)
option(BUILD_GPU_CL "Build gpu_cl backend" ON)
option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
#
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
+option(DOWNLOAD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
option(BUILD_LOGGING "Build logging runtime" OFF)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
-option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
-option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" ON)
+option(DOWNLOAD_OPENGL_HEADERS "Download Opengl_headers source" ON)
+option(DOWNLOAD_EGL_HEADERS "Download Egl_headers source" ON)
+option(DOWNLOAD_VULKAN "Download vulkan source" ON)
+
option(BUILD_GPU_CL "Build gpu_cl backend" ON)
option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
+
+option(BUILD_NPUD "Build NPU daemon" ON)
+# Do not allow to use CONFIG option on Tizen
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
option(BUILD_XNNPACK "Build XNNPACK" OFF)
option(DOWNLOAD_OPENCL_HEADERS "Download opencl headers" OFF)
+
+option(BUILD_NPUD "Build NPU daemon" ON)
+# Do not allow to use CONFIG option on Tizen
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
message(STATUS "Search acl in ${ARMCompute_LIB_SEARCH_PATHS}")
- if(NOT INCLUDE_DIR)
+ # ARMCompute v21.02 moves some headers into "src/".
+ # And we cannot build armcompute-ex library without these headers.
+ # So we need to download and use source code if our build root doesn't have headers in "src/" (tizen's devel package includes these headers).
+ # TODO Don't use headers in "src/"
+ find_path(HEADER_SRC_DIR NAMES src/core/CL/ICLKernel.h PATHS ${ARMCompute_INCLUDE_SEARCH_PATHS})
+ if(NOT INCLUDE_DIR OR NOT HEADER_SRC_DIR)
nnas_find_package(ARMComputeSource QUIET)
if (NOT ARMComputeSource_FOUND)
set(ARMCompute_FOUND FALSE PARENT_SCOPE)
return()
endif()
- set(INCLUDE_DIR ${ARMComputeSource_DIR} ${ARMComputeSource_DIR}/include)
- endif(NOT INCLUDE_DIR)
+
+ # Clean if INCLUDE_DIR is NOT_FOUND
+ if(NOT INCLUDE_DIR)
+ unset(INCLUDE_DIR)
+ endif(NOT INCLUDE_DIR)
+
+ list(APPEND INCLUDE_DIR ${ARMComputeSource_DIR} ${ARMComputeSource_DIR}/include)
+ endif(NOT INCLUDE_DIR OR NOT HEADER_SRC_DIR)
if(NOT CORE_LIBRARY)
set(ARMCompute_FOUND FALSE PARENT_SCOPE)
function(_Eigen_import)
- nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET)
+ nnas_find_package(TensorFlowEigenSource EXACT 2.8.0 QUIET)
if(NOT TensorFlowEigenSource_FOUND)
set(Eigen_FOUND FALSE PARENT_SCOPE)
--- /dev/null
+function(_GOBJECT_2_0_import)
+ nnfw_find_package(GLib2.0 REQUIRED)
+
+ find_library(GOBJECT_LIBRARIES
+ NAMES gobject-2.0)
+
+ # The gobject-2.0 requires glib-2.0 and access the header file based on
+ # the glib-2.0 include directory.
+ set(GOBJECT_INCLUDE_DIRS ${GLIB2.0_INCLUDE_DIRS})
+
+ set(GOBJECT_FOUND TRUE)
+
+ if(NOT GOBJECT_LIBRARIES)
+ set(GOBJECT_FOUND FALSE)
+ endif(NOT GOBJECT_LIBRARIES)
+
+ if(NOT GOBJECT_INCLUDE_DIRS)
+ set(GOBJECT_FOUND FALSE)
+ endif(NOT GOBJECT_INCLUDE_DIRS)
+
+ if(NOT GOBJECT_FOUND)
+ message(STATUS "Failed to find gobject-2.0")
+ endif(NOT GOBJECT_FOUND)
+
+ set(GOBJECT2.0_FOUND ${GOBJECT_FOUND} PARENT_SCOPE)
+ set(GOBJECT2.0_INCLUDE_DIRS ${GOBJECT_INCLUDE_DIRS} PARENT_SCOPE)
+ set(GOBJECT2.0_LIBRARIES ${GOBJECT_LIBRARIES} PARENT_SCOPE)
+endfunction(_GOBJECT_2_0_import)
+
+_GOBJECT_2_0_import()
add_library(gmock INTERFACE)
target_include_directories(gmock INTERFACE ${GMOCK_INCLUDE_DIR})
target_link_libraries(gmock INTERFACE ${GMOCK_LIBRARIES} Threads::Threads)
- endif(GMOCK_LIBRARIES)
+ endif(GMOCK_LIBRARIES AND GMOCK_INCLUDE_DIR)
endif(NOT TARGET gmock)
if(NOT TARGET gmock_main)
--- /dev/null
+function(_GIO_2_0_import)
+ nnfw_find_package(GLib2.0 REQUIRED)
+ nnfw_find_package(GObject2.0 REQUIRED)
+
+ find_library(GIO_LIBRARIES
+ NAMES gio-2.0)
+
+ # The gio-2.0 requires glib-2.0 and access the header file based on
+ # the glib-2.0 include directory.
+ set(GIO_INCLUDE_DIRS ${GLIB2.0_INCLUDE_DIRS} ${GOBJECT2.0_INCLUDE_DIRS})
+ set(GIO_LIBRARIES ${GIO_LIBRARIES} ${GOBJECT2.0_LIBRARIES})
+
+ set(GIO_FOUND TRUE)
+
+ if(NOT GIO_LIBRARIES)
+ set(GIO_FOUND FALSE)
+ endif(NOT GIO_LIBRARIES)
+
+ if(NOT GIO_INCLUDE_DIRS)
+ set(GIO_FOUND FALSE)
+ endif(NOT GIO_INCLUDE_DIRS)
+
+ if(NOT GIO_FOUND)
+ message(STATUS "Failed to find gio-2.0")
+ endif(NOT GIO_FOUND)
+
+ set(GIO2.0_FOUND ${GIO_FOUND} PARENT_SCOPE)
+ set(GIO2.0_INCLUDE_DIRS ${GIO_INCLUDE_DIRS} PARENT_SCOPE)
+ set(GIO2.0_LIBRARIES ${GIO_LIBRARIES} PARENT_SCOPE)
+endfunction(_GIO_2_0_import)
+
+_GIO_2_0_import()
--- /dev/null
+function(_GIO_UNIX_2_0_import)
+ nnfw_find_package(Gio2.0 REQUIRED)
+
+ find_path(GIO_UNIX_INCLUDE_DIR
+ NAMES gio/gunixfdlist.h
+ PATH_SUFFIXES gio-unix-2.0)
+
+ # The gio-unix-2.0 requires gio-2.0 and link the gio-2.0 library.
+ set(GIO_UNIX_LIBRARIES ${GIO2.0_LIBRARIES})
+
+ set(GIO_UNIX_FOUND TRUE)
+
+ if(NOT GIO_UNIX_LIBRARIES)
+ set(GIO_UNIX_FOUND FALSE)
+ endif(NOT GIO_UNIX_LIBRARIES)
+
+ if(NOT GIO_UNIX_INCLUDE_DIR)
+ set(GIO_UNIX_FOUND FALSE)
+ endif(NOT GIO_UNIX_INCLUDE_DIR)
+
+ if(NOT GIO_UNIX_FOUND)
+ message(STATUS "Failed to find gio-unix-2.0")
+ endif(NOT GIO_UNIX_FOUND)
+
+ set(GIO_UNIX_2.0_FOUND ${GIO_UNIX_FOUND} PARENT_SCOPE)
+ set(GIO_UNIX_2.0_INCLUDE_DIRS ${GIO_UNIX_INCLUDE_DIR} PARENT_SCOPE)
+ set(GIO_UNIX_2.0_LIBRARIES ${GIO_UNIX_LIBRARIES} PARENT_SCOPE)
+endfunction(_GIO_UNIX_2_0_import)
+
+_GIO_UNIX_2_0_import()
+++ /dev/null
-function(_Eigen_import)
- nnas_find_package(EigenSource QUIET)
-
- if(NOT EigenSource_FOUND)
- set(TensorFlowEigen_1_13_1_FOUND FALSE PARENT_SCOPE)
- return()
- endif(NOT EigenSource_FOUND)
-
- if(NOT TARGET eigen-tf-1.13.1)
- add_library(eigen-tf-1.13.1 INTERFACE)
- target_include_directories(eigen-tf-1.13.1 SYSTEM INTERFACE "${EigenSource_DIR}")
- # Add EIGEN_MPL2_ONLY to remove license issue posibility
- target_compile_definitions(eigen-tf-1.13.1 INTERFACE EIGEN_MPL2_ONLY)
- endif(NOT TARGET eigen-tf-1.13.1)
-
- set(TensorFlowEigen_1_13_1_FOUND TRUE PARENT_SCOPE)
-endfunction(_Eigen_import)
-
-_Eigen_import()
+++ /dev/null
-set(PACKAGE_VERSION "1.13.1")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
- set(PACKAGE_VERSION_EXACT TRUE)
- set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
--- /dev/null
+# TensorFlowGpuConfig.cmake
+macro(return_unless VAR)
+if(NOT ${VAR})
+ message("TensorFlowGpu: ${VAR} NOT TRUE")
+ set(TensorFlowGpu_FOUND FALSE PARENT_SCOPE)
+ return()
+endif(NOT ${VAR})
+endmacro(return_unless)
+
+function(_Build_TfliteGpuDelagate_)
+ nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET)
+ return_unless(TensorFlowSource_FOUND)
+
+ nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 QUIET)
+ return_unless(TensorFlowGEMMLowpSource_FOUND)
+
+ nnas_find_package(TensorFlowEigenSource EXACT 2.8.0 QUIET)
+ return_unless(TensorFlowEigenSource_FOUND)
+
+ nnas_find_package(AbseilSource REQUIRED)
+ return_unless(AbseilSource_FOUND)
+
+ nnas_find_package(Farmhash REQUIRED)
+ return_unless(Farmhash_FOUND)
+
+ nnas_find_package(Fp16Source REQUIRED)
+ return_unless(Fp16Source_FOUND)
+
+ nnas_find_package(VulkanSource QUIET)
+ return_unless(VulkanSource_FOUND)
+
+ nnas_find_package(Opengl_HeadersSource QUIET)
+ return_unless(Opengl_HeadersSource_FOUND)
+
+ nnas_find_package(Egl_HeadersSource QUIET)
+ return_unless(Egl_HeadersSource_FOUND)
+
+ if(NOT TARGET TensorFlowGpu)
+ nnas_include(ExternalProjectTools)
+ add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLiteGpu" TensorFlowLiteGpu)
+ endif()
+ set(TensorFlowSource_DIR ${TensorFlowSource_DIR} PARENT_SCOPE)
+ set(TensorFlowGpu_DIR ${TensorFlowGpu_DIR} PARENT_SCOPE)
+endfunction(_Build_TfliteGpuDelagate_)
+
+if(BUILD_TENSORFLOW_LITE_GPU)
+ _Build_TfliteGpuDelagate_()
+ set(TensorFlowGpu_FOUND TRUE PARENT_SCOPE)
+else(BUILD_TENSORFLOW_LITE_GPU)
+ set(TensorFlowGpu_FOUND FALSE PARENT_SCOPE)
+endif(BUILD_TENSORFLOW_LITE_GPU)
+++ /dev/null
-set(TENSORFLOW_LITE_BASE ${TensorFlowSource_DIR}/tensorflow/lite)
-
-#
-# Tensorflow Lite library
-#
-file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c" "${TENSORFLOW_LITE_BASE}/*.cc" "${TENSORFLOW_LITE_BASE}/core/*.cc")
-file(GLOB TFLITE_CORE_TESTS "${TENSORFLOW_LITE_BASE}/*test*.cc")
-list(REMOVE_ITEM TFLITE_CORE_SRCS ${TFLITE_CORE_TESTS})
-
-file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc")
-file(GLOB_RECURSE TFLITE_KERNEL_TESTS "${TENSORFLOW_LITE_BASE}/kernels/*test*.cc")
-list(REMOVE_ITEM TFLITE_KERNEL_SRCS ${TFLITE_KERNEL_TESTS})
-
-file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc")
-file(GLOB TFLITE_LIB_TESTS "${TENSORFLOW_LITE_BASE}/c/*test*.cc")
-list(REMOVE_ITEM TFLITE_LIB_SRCS ${TFLITE_LIB_TESTS})
-
-file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c" "${TENSORFLOW_LITE_BASE}/core/api/*.cc")
-file(GLOB TFLITE_API_TESTS "${TENSORFLOW_LITE_BASE}/core/api/*test*.cc")
-list(REMOVE_ITEM TFLITE_API_SRCS ${TFLITE_API_TESTS})
-
-file(GLOB TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/*.cc")
-file(GLOB TFLITE_PROFILING_TESTS "${TENSORFLOW_LITE_BASE}/profiling/*test*.cc")
-list(REMOVE_ITEM TFLITE_PROFILING_SRCS ${TFLITE_PROFILING_TESTS})
-
-# We will use our own summarizer
-list(REMOVE_ITEM TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/profile_summarizer.cc")
-list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS})
-
-list(APPEND TFLITE_SRCS "${FarmhashSource_DIR}/src/farmhash.cc")
-
-# externals for spectrogram
-list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg.c")
-list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg2d.c")
-
-list(APPEND TFLITE_INCLUDES "${TensorFlowSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${AbseilSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${GEMMLowpSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${FarmhashSource_DIR}/src")
-
-if(NEON2SSESource_FOUND)
- list(APPEND TFLITE_INCLUDES "${NEON2SSESource_DIR}")
-endif(NEON2SSESource_FOUND)
-
-add_library(tensorflow-lite STATIC ${TFLITE_SRCS})
-target_include_directories(tensorflow-lite SYSTEM PUBLIC ${TFLITE_INCLUDES})
-target_compile_definitions(tensorflow-lite PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK")
-set_property(TARGET tensorflow-lite PROPERTY POSITION_INDEPENDENT_CODE ON)
-target_link_libraries(tensorflow-lite eigen-tf-1.13.1 flatbuffers::flatbuffers ${LIB_PTHREAD} dl)
-
-# Define TF_LITE_DISABLE_X86_NEON for debug build
-# If we upgrade NEON2SSE version, we can remove below line
-if(NEON2SSESource_FOUND)
- target_compile_definitions(tensorflow-lite PRIVATE $<$<CONFIG:Debug>:TF_LITE_DISABLE_X86_NEON>)
-endif(NEON2SSESource_FOUND)
-
-if(ANDROID)
- target_link_libraries(tensorflow-lite log)
- target_include_directories(tensorflow-lite PUBLIC "${NDK_DIR}/..")
-endif()
+++ /dev/null
-# NOTE This line prevents multiple definitions of tensorflow-lite target
-if(TARGET tensorflow-lite)
- set(TensorFlowLite_FOUND TRUE)
- return()
-endif(TARGET tensorflow-lite)
-
-if(BUILD_TENSORFLOW_LITE)
- macro(return_unless VAR)
- if(NOT ${VAR})
- set(TensorFlowLite_FOUND PARENT_SCOPE)
- return()
- endif(NOT ${VAR})
- endmacro(return_unless)
-
- # Required packages
- nnas_find_package(Abseil QUIET)
- return_unless(Abseil_FOUND)
- nnfw_find_package(TensorFlowEigen EXACT 1.13.1 QUIET)
- return_unless(TensorFlowEigen_1_13_1_FOUND)
- nnas_find_package(FarmhashSource QUIET)
- return_unless(FarmhashSource_FOUND)
- nnfw_find_package(FlatBuffers QUIET)
- return_unless(FlatBuffers_FOUND)
- nnas_find_package(GEMMLowpSource QUIET)
- return_unless(GEMMLowpSource_FOUND)
- nnas_find_package(TensorFlowSource EXACT 1.13.1 QUIET)
- return_unless(TensorFlowSource_FOUND)
- nnas_find_package(OouraFFTSource QUIET)
- return_unless(OouraFFTSource_FOUND)
-
- # Optional packages
- nnas_find_package(NEON2SSESource QUIET)
-
- nnas_include(ExternalProjectTools)
- add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite" tflite)
-
- set(TensorFlowLite_FOUND TRUE)
- return()
-endif(BUILD_TENSORFLOW_LITE)
-
-# Use pre-built TensorFlow Lite
-find_path(TFLITE_INCLUDE_DIR NAMES tensorflow/lite/interpreter.h)
-find_library(TFLITE_LIB NAMES tensorflow-lite)
-
-if(NOT TFLITE_INCLUDE_DIR)
- # Tizen install TensorFlow Lite 1.13.1 headers in /usr/include/tensorflow1
- find_path(TFLITE_INCLUDE_DIR NAMES tensorflow/lite/interpreter.h PATHS "/usr/include/tensorflow1")
- if(NOT TFLITE_INCLUDE_DIR)
- set(TensorFlowLite_FOUND FALSE)
- return()
- endif(NOT TFLITE_INCLUDE_DIR)
-endif(NOT TFLITE_INCLUDE_DIR)
-
-if(NOT TFLITE_LIB)
- set(TensorFlowLite_FOUND FALSE)
- return()
-endif(NOT TFLITE_LIB)
-
-message(STATUS "Found TensorFlow Lite: TRUE (include: ${TFLITE_INCLUDE_DIR}, lib: ${TFLITE_LIB}")
-
-# TODO Use IMPORTED target
-add_library(tensorflow-lite INTERFACE)
-target_include_directories(tensorflow-lite SYSTEM INTERFACE ${TFLITE_INCLUDE_DIR})
-target_link_libraries(tensorflow-lite INTERFACE ${TFLITE_LIB})
-find_package(Flatbuffers)
-if(Flatbuffers_FOUND)
- target_link_libraries(tensorflow-lite INTERFACE flatbuffers::flatbuffers)
-endif(Flatbuffers_FOUND)
-
-# Prefer -pthread to -lpthread
-set(THREADS_PREFER_PTHREAD_FLAG TRUE)
-set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
-find_package(Threads QUIET)
-
-if(Threads_FOUND)
- target_link_libraries(tensorflow-lite INTERFACE ${CMAKE_THREAD_LIBS_INIT})
-endif(Threads_FOUND)
-
-set(TensorFlowLite_FOUND TRUE)
+++ /dev/null
-set(PACKAGE_VERSION "1.13.1")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
- set(PACKAGE_VERSION_EXACT TRUE)
- set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
-# Reference: https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
+# Reference: https://github.com/tensorflow/tensorflow/blob/v2.8.0/tensorflow/lite/CMakeLists.txt
#
-# Tensorflow Lite library 2.3.0
+# Tensorflow Lite library 2.8.0
#
-set(TENSORFLOW_LITE_BASE ${TensorFlowSource_DIR}/tensorflow/lite)
-
-file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c"
- "${TENSORFLOW_LITE_BASE}/*.cc"
- "${TENSORFLOW_LITE_BASE}/core/*.cc")
-
-file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc")
-
-file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc")
-
-file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c"
- "${TENSORFLOW_LITE_BASE}/core/api/*.cc")
-
-list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/memory_info.cc")
-list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/time.cc")
-list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/platform_profiler.cc")
-
-file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc")
-
-file(GLOB TFLITE_SCHEMA_UTIL_SRCS "${TENSORFLOW_LITE_BASE}/schema/*.cc")
-
-# Moved to kerenls/internal/utils
-#file(GLOB TFLITE_SPARSITY_SRCS "${TENSORFLOW_LITE_BASE}/tools/optimize/sparsity/*.cc")
-
-list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_EXPERIMENTAL_SRCS})
-#list(APPEND TFLITE_SRCS ${TFLITE_SPARSITY_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_SCHEMA_UTIL_SRCS})
-
-# externals
-list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg.c")
-list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg2d.c")
-
-# Build with mmap? true
-# caution: v2.3.0's Makefile has wrong code on this part. This is fixed on master branch.
-set(BUILD_WITH_MMAP TRUE)
-if(${BUILD_WITH_MMAP})
- list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation_disabled.cc")
-else()
- list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation.cc")
+set(TFLITE_SOURCE_DIR ${TensorFlowSource_DIR}/tensorflow/lite)
+
+# Generate TensorFlow Lite FlatBuffer code.
+# We used to have an actual compilation logic with flatc but decided to use
+# schema_generated.h since flatc doesn't work with cross compilation.
+set(TFLITE_FLATBUFFERS_SCHEMA_DIR "${TFLITE_SOURCE_DIR}/schema")
+
+macro(populate_source_vars SOURCE_DIR SOURCES_VAR)
+ cmake_parse_arguments(ARGS "RECURSE" "" "FILTER" ${ARGN})
+ if(ARGS_RECURSE)
+ set(GLOB_OP GLOB_RECURSE)
+ else()
+ set(GLOB_OP GLOB)
+ endif()
+ set(DEFAULT_FILE_FILTER ".*(_test|test_util)\\.(c|cc|h)$")
+ file(${GLOB_OP} FOUND_SOURCES "${SOURCE_DIR}/*.*")
+ list(FILTER FOUND_SOURCES INCLUDE REGEX ".*\\.(c|cc|h)$")
+ list(FILTER FOUND_SOURCES EXCLUDE REGEX "${DEFAULT_FILE_FILTER}")
+ foreach(FILE_FILTER ${ARGS_FILTER})
+ list(FILTER FOUND_SOURCES EXCLUDE REGEX "${FILE_FILTER}")
+ endforeach()
+ list(APPEND ${SOURCES_VAR} ${FOUND_SOURCES})
+endmacro()
+# Simplifies inclusion of non-test sources and headers from a directory
+# relative to TFLITE_SOURCE_DIR. See populate_source_vars() for the
+# description of arguments including and following SOURCES_VAR.
+macro(populate_tflite_source_vars RELATIVE_DIR SOURCES_VAR)
+ populate_source_vars(
+ "${TFLITE_SOURCE_DIR}/${RELATIVE_DIR}" ${SOURCES_VAR} ${ARGN}
+ )
+endmacro()
+
+# Build a list of source files to compile into the TF Lite library.
+populate_tflite_source_vars("." TFLITE_SRCS)
+
+# This particular file is excluded because the more explicit approach to enable
+# XNNPACK delegate is preferred to the weak-symbol one.
+list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*tflite_with_xnnpack\\.cc$")
+
+# Exclude Flex related files.
+list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*with_selected_ops\\.cc$")
+
+# Use MMAP
+list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*mmap_allocation_disabled\\.cc$")
+
+if(NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
+ list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*minimal_logging_android\\.cc$")
endif()
-
-# Build with nnapi? true
-# caution: this nnapi delegate comes from tflite, not ours.
-set(BUILD_WITH_NNAPI TRUE)
-if(${BUILD_WITH_NNAPI})
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate.cc")
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/quant_lstm_sup.cc")
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/utils.cc")
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/serialization.cc")
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation.cc")
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_util.cc")
-else()
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate_disabled.cc")
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation_disabled.cc")
+if(NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "iOS")
+ list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*minimal_logging_ios\\.cc$")
endif()
-# ios: we don't support ios
-list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_ios.cc")
-
-# android
-if(NOT ANDROID)
- list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_android.cc")
+populate_tflite_source_vars("core" TFLITE_CORE_SRCS)
+populate_tflite_source_vars("core/api" TFLITE_CORE_API_SRCS)
+populate_tflite_source_vars("c" TFLITE_C_SRCS)
+populate_tflite_source_vars("delegates" TFLITE_DELEGATES_SRCS)
+
+# Enable NNAPI
+populate_tflite_source_vars("delegates/nnapi"
+TFLITE_DELEGATES_NNAPI_SRCS
+FILTER "(_test_list|_disabled)\\.(cc|h)$"
+)
+populate_tflite_source_vars(
+"nnapi" TFLITE_NNAPI_SRCS FILTER "(_disabled)\\.(cc|h)$"
+)
+
+# Disable XNNPack
+
+# Enable experimental support for resource (need for build success)
+populate_tflite_source_vars("experimental/resource"
+TFLITE_EXPERIMENTAL_RESOURCE_SRCS
+)
+
+# Enable Ruy
+populate_tflite_source_vars("experimental/ruy"
+ TFLITE_EXPERIMENTAL_RUY_SRCS
+ FILTER
+ ".*(test(_fast|_slow|_special_specs))\\.(cc|h)$"
+ ".*(benchmark|tune_tool|example)\\.(cc|h)$"
+)
+populate_tflite_source_vars("experimental/ruy/profiler"
+ TFLITE_EXPERIMENTAL_RUY_PROFILER_SRCS
+ FILTER ".*(test|test_instrumented_library)\\.(cc|h)$"
+)
+list(APPEND TFLITE_TARGET_PUBLIC_OPTIONS "-DTFLITE_WITH_RUY")
+
+populate_tflite_source_vars("kernels"
+ TFLITE_KERNEL_SRCS
+ FILTER "(.*_test_util_internal|test_.*|.*_ops_wrapper)\\.(cc|h)"
+)
+populate_tflite_source_vars("kernels/internal" TFLITE_KERNEL_INTERNAL_SRCS)
+populate_tflite_source_vars("kernels/internal/optimized"
+ TFLITE_KERNEL_INTERNAL_OPT_SRCS
+)
+populate_tflite_source_vars("kernels/internal/optimized/integer_ops"
+ TFLITE_KERNEL_INTERNAL_OPT_INTEGER_OPS_SRCS
+)
+populate_tflite_source_vars("kernels/internal/optimized/sparse_ops"
+ TFLITE_KERNEL_INTERNAL_OPT_SPARSE_OPS_SRCS
+)
+populate_tflite_source_vars("kernels/internal/reference"
+ TFLITE_KERNEL_INTERNAL_REF_SRCS
+)
+populate_tflite_source_vars("kernels/internal/reference/integer_ops"
+ TFLITE_KERNEL_INTERNAL_REF_INTEGER_OPS_SRCS
+)
+populate_tflite_source_vars("kernels/internal/reference/sparse_ops"
+ TFLITE_KERNEL_INTERNAL_REF_SPARSE_OPS_SRCS
+)
+set(TFLITE_PROFILER_SRCS ${TFLITE_SOURCE_DIR}/profiling/platform_profiler.cc)
+if(CMAKE_SYSTEM_NAME MATCHES "Android")
+ list(APPEND TFLITE_PROFILER_SRCS
+ ${TFLITE_SOURCE_DIR}/profiling/atrace_profiler.cc
+ )
endif()
-# exclude some source files
-file(GLOB_RECURSE TFLITE_EXCLS "${TENSORFLOW_LITE_BASE}/*test*.cc"
- "${TENSORFLOW_LITE_BASE}/*benchmark*.cc"
- "${TENSORFLOW_LITE_BASE}/*example*.cc"
- "${TENSORFLOW_LITE_BASE}/*tool*.cc")
-list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_EXCLS})
-
-# exclude some kernels (requires python3-dev package)
-# TODO Enable these kernels by installing package on build system
-file(GLOB_RECURSE TFLITE_KERNEL_EXCLS "${TENSORFLOW_LITE_BASE}/kernels/variable_ops_wrapper.cc"
- "${TENSORFLOW_LITE_BASE}/kernels/gradient/*.cc"
- "${TENSORFLOW_LITE_BASE}/kernels/perception/*.cc")
-list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_KERNEL_EXCLS})
-
-# exclude kernel shim
-file(GLOB_RECURSE TFLITE_SHIM_EXCLS "${TENSORFLOW_LITE_BASE}/kernels/shim/*.cc")
-list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_SHIM_EXCLS})
+# Common include directories
+set(TFLITE_INCLUDE_DIRS
+ "${TENSORFLOW_SOURCE_DIR}"
+ "${TFLITE_FLATBUFFERS_SCHEMA_DIR}"
+)
# include headers
-list(APPEND TFLITE_INCLUDES "${TensorFlowSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TensorFlowGEMMLowpSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${Fp16Source_DIR}/include")
-#list(APPEND TFLITE_INCLUDES "${Pybind11Source_DIR}/include")
+list(APPEND TFLITE_INCLUDE_DIRS "${TensorFlowSource_DIR}")
+list(APPEND TFLITE_INCLUDE_DIRS "${TensorFlowGEMMLowpSource_DIR}")
+list(APPEND TFLITE_INCLUDE_DIRS "${Fp16Source_DIR}/include")
+#list(APPEND TFLITE_INCLUDE_DIRS "${Pybind11Source_DIR}/include")
+list(APPEND TFLITE_INCLUDE_DIRS "${CpuInfoSource_DIR}")
if(NEON2SSESource_FOUND)
- list(APPEND TFLITE_INCLUDES "${NEON2SSESource_DIR}")
+ list(APPEND TFLITE_INCLUDE_DIRS "${NEON2SSESource_DIR}")
endif(NEON2SSESource_FOUND)
-add_library(tensorflow-lite-2.8.0 STATIC ${TFLITE_SRCS})
-target_include_directories(tensorflow-lite-2.8.0 SYSTEM PUBLIC ${TFLITE_INCLUDES})
-target_include_directories(tensorflow-lite-2.8.0 PRIVATE ${CpuInfoSource_DIR})
-target_compile_definitions(tensorflow-lite-2.8.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV -DRUY_HAVE_CPUINFO")
+# TFLite library
+add_library(tensorflow-lite-2.8.0 STATIC
+ ${TFLITE_CORE_API_SRCS}
+ ${TFLITE_CORE_SRCS}
+ ${TFLITE_C_SRCS}
+ ${TFLITE_DELEGATES_NNAPI_SRCS}
+ ${TFLITE_DELEGATES_SRCS}
+ ${TFLITE_EXPERIMENTAL_RESOURCE_SRCS}
+ ${TFLITE_EXPERIMENTAL_RUY_PROFILER_SRCS}
+ ${TFLITE_EXPERIMENTAL_RUY_SRCS}
+ ${TFLITE_KERNEL_INTERNAL_OPT_INTEGER_OPS_SRCS}
+ ${TFLITE_KERNEL_INTERNAL_OPT_SPARSE_OPS_SRCS}
+ ${TFLITE_KERNEL_INTERNAL_OPT_SRCS}
+ ${TFLITE_KERNEL_INTERNAL_REF_INTEGER_OPS_SRCS}
+ ${TFLITE_KERNEL_INTERNAL_REF_SPARSE_OPS_SRCS}
+ ${TFLITE_KERNEL_INTERNAL_REF_SRCS}
+ ${TFLITE_KERNEL_INTERNAL_SRCS}
+ ${TFLITE_KERNEL_SRCS}
+ ${TFLITE_NNAPI_SRCS}
+ ${TFLITE_SRCS}
+ ${TFLITE_PROFILER_SRCS}
+ ${TFLITE_SOURCE_DIR}/kernels/internal/utils/sparsity_format_converter.cc
+ ${TFLITE_SOURCE_DIR}/schema/schema_utils.cc
+ ${OouraFFTSource_DIR}/fftsg.c
+ ${OouraFFTSource_DIR}/fftsg2d.c
+)
+target_include_directories(tensorflow-lite-2.8.0
+ SYSTEM PUBLIC
+ ${TFLITE_INCLUDE_DIRS}
+)
+
+target_compile_definitions(tensorflow-lite-2.8.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV -DRUY_HAVE_CPUINFO -DNNAPI_VERBOSE_VALIDATION")
set_property(TARGET tensorflow-lite-2.8.0 PROPERTY POSITION_INDEPENDENT_CODE ON)
target_link_libraries(tensorflow-lite-2.8.0 eigen flatbuffers::flatbuffers ruy abseil farmhash ${LIB_PTHREAD} dl)
-if(NOT ANDROID AND ${BUILD_WITH_NNAPI})
+if(NOT ANDROID)
target_link_libraries(tensorflow-lite-2.8.0 rt)
endif()
if(ANDROID)
target_link_libraries(tensorflow-lite-2.8.0 log)
- target_include_directories(tensorflow-lite-2.8.0 PUBLIC "${NDK_DIR}/..")
+ #target_include_directories(tensorflow-lite-2.8.0 PUBLIC "${NDK_DIR}/..")
endif()
-if(BUILD_TENSORFLOW_LITE_2_8_0)
+# NOTE This line prevents multiple definitions of tensorflow-lite target
+if(TARGET tensorflow-lite-2.8.0)
+ set(TensorFlowLite_FOUND TRUE)
+ return()
+endif(TARGET tensorflow-lite-2.8.0)
+
+if(BUILD_TENSORFLOW_LITE)
macro(return_unless VAR)
if(NOT ${VAR})
message("TFLite 2.8: ${VAR} NOT TRUE")
- set(TensorFlowLite_2_8_0_FOUND FALSE PARENT_SCOPE)
+ set(TensorFlowLite_FOUND FALSE)
return()
endif(NOT ${VAR})
endmacro(return_unless)
nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET)
return_unless(TensorFlowSource_FOUND)
- # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/tensorflow/workspace.bzl
- nnas_find_package(AbseilSource QUIET)
- return_unless(AbseilSource_FOUND)
+ # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.8.0/tensorflow/workspace2.bzl
+ nnas_find_package(Abseil QUIET)
+ return_unless(Abseil_FOUND)
nnfw_find_package(Eigen QUIET)
return_unless(Eigen_FOUND)
nnas_find_package(Farmhash QUIET)
nnas_include(ExternalProjectTools)
add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite" tflite-2.8.0)
- set(TensorFlowLite_2_8_0_FOUND TRUE)
+ set(TensorFlowLite_FOUND TRUE)
return()
endif()
+
+# Use pre-built TensorFlow Lite
+find_path(TFLITE_INCLUDE_DIR NAMES tensorflow/lite/c/c_api.h)
+find_library(TFLITE_LIB NAMES tensorflow2-lite)
+
+if(NOT TFLITE_INCLUDE_DIR)
+ # Tizen install TensorFlow Lite 2.8 headers in /usr/include/tensorflow2
+ find_path(TFLITE_INCLUDE_DIR NAMES tensorflow/lite/c/c_api.h PATHS "/usr/include/tensorflow2")
+ if(NOT TFLITE_INCLUDE_DIR)
+ set(TensorFlowLite_FOUND FALSE)
+ return()
+ endif(NOT TFLITE_INCLUDE_DIR)
+endif(NOT TFLITE_INCLUDE_DIR)
+
+if(NOT TFLITE_LIB)
+ set(TensorFlowLite_FOUND FALSE)
+ return()
+endif(NOT TFLITE_LIB)
+
+message(STATUS "Found TensorFlow Lite: TRUE (include: ${TFLITE_INCLUDE_DIR}, lib: ${TFLITE_LIB}")
+
+# TODO Use IMPORTED target
+add_library(tensorflow-lite-2.8.0 INTERFACE)
+target_include_directories(tensorflow-lite-2.8.0 SYSTEM INTERFACE ${TFLITE_INCLUDE_DIR})
+target_link_libraries(tensorflow-lite-2.8.0 INTERFACE ${TFLITE_LIB})
+find_package(Flatbuffers)
+if(Flatbuffers_FOUND)
+ target_link_libraries(tensorflow-lite-2.8.0 INTERFACE flatbuffers::flatbuffers)
+endif(Flatbuffers_FOUND)
+
+# Prefer -pthread to -lpthread
+set(THREADS_PREFER_PTHREAD_FLAG TRUE)
+set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
+find_package(Threads QUIET)
+
+if(Threads_FOUND)
+ target_link_libraries(tensorflow-lite-2.8.0 INTERFACE ${CMAKE_THREAD_LIBS_INIT})
+endif(Threads_FOUND)
+
+set(TensorFlowLite_FOUND TRUE)
--- /dev/null
+#
+# Tensorflow Lite GPU delegate library 2.8.0
+#
+
+set(LIB_TENSORFLOW_GPU_DELEGATE "TensorFlowGpu")
+
+#TENSORFLOWGPU_SOURCE_DIR
+set(TENSORFLOWSOURCE_DIR ${TensorFlowSource_DIR})
+set(TENSORFLOW_LITE_BASE ${TENSORFLOWSOURCE_DIR}/tensorflow/lite)
+set(REF_TENSORFLOW_LITE_GPU_DELEGATE_SRC_BASE "${TENSORFLOW_LITE_BASE}/delegates/gpu")
+
+set(SRC_BASE "${REF_TENSORFLOW_LITE_GPU_DELEGATE_SRC_BASE}")
+file(GLOB GPU_CL_SRC_LIST "${SRC_BASE}/cl/*.cc"
+ "${SRC_BASE}/cl/kernels/*.cc"
+ "${SRC_BASE}/common/*.cc"
+ "${SRC_BASE}/common/selectors/*.cc"
+ "${SRC_BASE}/common/selectors/default/*.cc"
+ "${SRC_BASE}/common/task/*.cc"
+ "${SRC_BASE}/common/tasks/*.cc"
+ "${SRC_BASE}/common/tasks/special/*.cc"
+ "${SRC_BASE}/common/memory_management/*.cc"
+ "${SRC_BASE}/common/transformations/*.cc"
+ )
+
+file(GLOB REMOVE_TEST_SRCS "${SRC_BASE}/cl/*_test*.cc"
+ "${SRC_BASE}/cl/testing/*.cc"
+ "${SRC_BASE}/cl/kernels/*_test*.cc"
+ "${SRC_BASE}/common/*_test*.cc"
+ "${SRC_BASE}/common/tasks/*_test*.cc"
+ "${SRC_BASE}/common/transformations/*_test*.cc"
+ )
+# Not available
+file(GLOB REMOVE_SRCS "${SRC_BASE}/cl/*gl*.cc"
+ "${SRC_BASE}/cl/gpu_api_delegate.cc"
+ "${SRC_BASE}/cl/serialization.cc"
+ "${SRC_BASE}/common/lstm_parser.cc"
+ "${SRC_BASE}/common/model_builder.cc"
+ "${SRC_BASE}/common/model_builder_helper.cc"
+ "${SRC_BASE}/common/object_reader.cc"
+ "${SRC_BASE}/common/quantization_util.cc"
+ "${SRC_BASE}/common/memory_management/*_test.cc"
+ )
+
+list(APPEND GPU_CL_SRC_LIST "${TENSORFLOW_LITE_BASE}/experimental/acceleration/compatibility/android_info.cc")
+
+list(REMOVE_ITEM GPU_CL_SRC_LIST ${REMOVE_TEST_SRCS})
+list(REMOVE_ITEM GPU_CL_SRC_LIST ${REMOVE_SRCS})
+list(APPEND TFLITE_GPU_SRCS ${GPU_CL_SRC_LIST})
+
+add_library(${LIB_TENSORFLOW_GPU_DELEGATE} STATIC ${TFLITE_GPU_SRCS})
+
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${Opencl_Headers_DIR}")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${Fp16Source_DIR}/include")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${TensorFlowSource_DIR}")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${TensorFlowGEMMLowpSource_DIR}")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${TensorFlowEigenSource_DIR}")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${VulkanSource_DIR}/include")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${Opengl_HeadersSource_DIR}/api")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${Egl_HeadersSource_DIR}/api")
+
+target_link_libraries(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE abseil farmhash fp16 flatbuffers)
+
+# GL codes are not used on gpu_cl
+target_compile_options(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "-DCL_DELEGATE_NO_GL")
+target_compile_options(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "-DTFLITE_GPU_BINARY_RELEASE" "-DEGL_NO_X11")
+
+# deprecated-copy warning on header (gcc 9.4.0)
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 9.4)
+ target_compile_options(${LIB_TENSORFLOW_GPU_DELEGATE} PUBLIC "-Wno-deprecated-copy")
+endif()
+
+# Applying PIC first, currently used on gpu_cl only
+set_target_properties(${LIB_TENSORFLOW_GPU_DELEGATE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
--- /dev/null
+#!/bin/bash
+
+import "build.configuration"
+
+# This command is used to download test materials on host environment
+# by using test command on host
+
+# Common variables
+DRIVER_PATH=$NNFW_PROJECT_PATH/tests/scripts
+CACHE_PATH=${CACHE_PATH:-$WORKSPACE_PATH/out/test/cache}
+
+COMMAND_FILE=$DRIVER_PATH/command/prepare-model
+if [[ ! -f $COMMAND_FILE ]]; then
+ echo "ERROR: '$COMMAND' is not supported"
+ exit 255
+fi
+
+source $COMMAND_FILE $@
profile = profile.tizen
[profile.tizen]
-repos = repo.tizen_base,repo.tizen_mobile
-buildroot = /home/GBS-ROOT/
+repos = repo.base, repo.unified
-[repo.tizen_mobile]
-url = http://download.tizen.org/snapshots/tizen/unified/latest/repos/standard/packages/
+[repo.unified]
+url = http://download.tizen.org/snapshots/TIZEN/Tizen-7.0/Tizen-7.0-Unified/latest/repos/standard/packages/
-[repo.tizen_base]
-url = http://download.tizen.org/snapshots/tizen/base/latest/repos/standard/packages/
+[repo.base]
+url = http://download.tizen.org/snapshots/TIZEN/Tizen-7.0/Tizen-7.0-Base/latest/repos/standard/packages/
--- /dev/null
+cmake_minimum_required(VERSION 3.15)
+
+project(onert-micro)
+
+enable_testing()
+
+set(CMAKE_CXX_STANDARD 14)
+
+set(CMAKE_SKIP_BUILD_RPATH FALSE)
+set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
+set(CMAKE_INSTALL_RPATH "$ORIGIN/../lib:$ORIGIN/")
+set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+if (NOT DEFINED TARGET_ARCH)
+ set(TARGET_ARCH "armv7em")
+endif()
+
+if (NOT DEFINED TARGET_CPU)
+ set(TARGET_CPU "cortex-m7")
+endif()
+
+if (NOT DEFINED TARGET_OS)
+ set(TARGET_OS "generic")
+endif()
+
+include(utils.cmake)
+
+nnas_find_package(GTest QUIET)
+
+option(ENABLE_TEST "Build Tests using Google Test" ${GTest_FOUND})
+
+if(${ENABLE_TEST} AND NOT ${GTest_FOUND})
+ message(FATAL_ERROR "Google Test is required to enable test")
+endif(${ENABLE_TEST} AND NOT ${GTest_FOUND})
+
+option(ENABLE_COVERAGE "Build for coverage test" OFF)
+if(${ENABLE_COVERAGE} AND NOT ${ENABLE_TEST})
+ message(FATAL_ERROR "Test should be enabled to measure test coverage")
+endif(${ENABLE_COVERAGE} AND NOT ${ENABLE_TEST})
+
+if(${ENABLE_TEST})
+ include(CTest)
+endif(${ENABLE_TEST})
+
+###
+### Target
+###
+add_library(onert_micro_common INTERFACE)
+if(ENABLE_STRICT_BUILD)
+ target_compile_options(onert_micro_common INTERFACE -Werror -Wall -Wextra -Wno-reorder)
+endif(ENABLE_STRICT_BUILD)
+
+add_library(onert_micro_coverage INTERFACE)
+if(ENABLE_COVERAGE)
+ target_compile_options(onert_micro_coverage INTERFACE -g -O0 -fprofile-arcs -ftest-coverage)
+ target_link_libraries(onert_micro_coverage INTERFACE gcov)
+endif(ENABLE_COVERAGE)
+
+add_subdirectory("${NNAS_PROJECT_SOURCE_DIR}/onert-micro" "${CMAKE_BINARY_DIR}/onert-micro")
--- /dev/null
+#
+# Platform independent compile flag setting
+#
+# flags for build type: debug, release
+set(CMAKE_C_FLAGS_DEBUG "-O0 -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -DDEBUG")
+set(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG")
+set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
+
+#
+# Platform specific compile flag setting
+#
+if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/buildtool/config/config_${TARGET_PLATFORM}.cmake")
+ include("${CMAKE_CURRENT_LIST_DIR}/buildtool/config/config_${TARGET_PLATFORM}.cmake")
+endif()
+
+#
+# Apply compile flags
+# note: this should be placed after cmake/buildtool/config/config_xxx.cmake files
+#
+# add common flags
+foreach(FLAG ${FLAGS_COMMON})
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
+endforeach()
+
+# add c flags
+foreach(FLAG ${FLAGS_CONLY})
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG}")
+endforeach()
+
+# add cxx flags
+foreach(FLAG ${FLAGS_CXXONLY})
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
+endforeach()
--- /dev/null
+# Platform specific configuration
+# note: this should be placed before default setting for option setting priority
+# (platform specific setting have higher priority)
+#
+include("${NNAS_PROJECT_SOURCE_DIR}/infra/onert-micro/cmake/options/options_${TARGET_PLATFORM}.cmake")
+
+###
+### Configuration
+###
+option(DOWNLOAD_RUY "Download ruy source" ON)
+option(DOWNLOAD_EIGEN "Download Eigen source" ON)
+option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
+option(DOWNLOAD_FLATBUFFERS "Download FlatBuffers source" ON)
+option(BUILD_FLATBUFFERS "Locally build Flatbuffers from the downloaded source" ON)
+option(DOWNLOAD_TENSORFLOW "Download TensorFlow source" ON)
+
+option(DOWNLOAD_GTEST "Download Google Test source" ON)
+option(BUILD_GTEST "Build Google Test from the downloaded source" ON)
--- /dev/null
+set(CMAKE_SYSTEM_NAME Generic)
+
+set(CMAKE_SYSTEM_PROCESSOR "${CPU_ARCH}")
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+set(CMAKE_C_COMPILER "${C_COMPILER}")
+set(CMAKE_CXX_COMPILER "${CXX_COMPILER}")
+set(CMAKE_ASM_COMPILER "${ASM_COMPILER}")
+set(CMAKE_OBJCOPY "${OBJCOPY}")
+
+set(TARGET_CPU "cortex-m4" CACHE STRING "Target CPU")
+
+# Convert TARGET_CPU=Cortex-M33+nofp+nodsp into
+# - CMAKE_SYSTEM_PROCESSOR=cortex-m33
+# - TARGET_CPU_FEATURES=no-fp;no-dsp
+string(REPLACE "+" ";" TARGET_CPU_FEATURES ${TARGET_CPU})
+list(POP_FRONT TARGET_CPU_FEATURES CMAKE_SYSTEM_PROCESSOR)
+string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} CMAKE_SYSTEM_PROCESSOR)
+
+set(CMAKE_EXECUTABLE_SUFFIX ".elf")
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
+# Select C/C++ version
+set(CMAKE_C_STANDARD 99)
+set(CMAKE_CXX_STANDARD 14)
+
+# Compile options
+add_compile_options(
+ -mcpu=${TARGET_CPU}
+ -mthumb
+ "$<$<CONFIG:DEBUG>:-gdwarf-3>"
+ "$<$<COMPILE_LANGUAGE:CXX>:-funwind-tables;-frtti;-fexceptions>")
+
+# Compile definescd
+add_compile_definitions(
+ "$<$<NOT:$<CONFIG:DEBUG>>:NDEBUG>")
+
+# Link options
+add_link_options(
+ -mcpu=${TARGET_CPU}
+ -mthumb
+ --specs=nosys.specs)
+
+# Set floating point unit
+if("${TARGET_CPU}" MATCHES "\\+fp")
+ set(FLOAT hard)
+elseif("${TARGET_CPU}" MATCHES "\\+nofp")
+ set(FLOAT soft)
+elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m33" OR
+ "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m55")
+ set(FLOAT hard)
+else()
+ set(FLOAT soft)
+endif()
+
+if (FLOAT)
+ add_compile_options(-mfloat-abi=${FLOAT})
+ add_link_options(-mfloat-abi=${FLOAT})
+endif()
+
+# Compilation warnings
+add_compile_options(
+ -Wno-all
+)
--- /dev/null
+#
+# linux common compile options
+#
+
+# Disable annoying ABI compatibility warning.
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
+ list(APPEND FLAGS_CXXONLY "-Wno-psabi")
+endif()
+
+# lib pthread as a variable (pthread must be disabled on android)
+set(LIB_PTHREAD pthread)
--- /dev/null
+#
+# x86_64 linux compile options
+#
+message(STATUS "Building for x86-64 Linux")
+
+# include linux common
+include("cmake/buildtool/config/config_linux.cmake")
+
+# SIMD for x86
+set(FLAGS_COMMON ${FLAGS_COMMON}
+ "-msse4"
+ )
--- /dev/null
+#
+# armv7em generic cmake options
+#
--- /dev/null
+#
+# armv7em generic cmake options
+#
--- /dev/null
+#
+# armv8-m generic cmake options
+#
--- /dev/null
+#
+# x86_64 linux cmake options
+#
--- /dev/null
+set(NNAS_PROJECT_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../.." CACHE
+ INTERNAL "Where to find nnas top-level source directory"
+ )
+
+set(NNAS_EXTERNALS_DIR
+ "${NNAS_PROJECT_SOURCE_DIR}/externals" CACHE
+ INTERNAL "Where to download external dependencies"
+ )
+set(ONERT_MICRO_OVERLAY_DIR "${CMAKE_BINARY_DIR}/overlay" CACHE
+ INTERNAL "Where locally built external dependencies are installed")
+
+# Share package build script with runtime
+set(EXT_OVERLAY_DIR ${ONERT_MICRO_OVERLAY_DIR})
+
+# This allows find_package to access configurations installed inside overlay
+list(APPEND CMAKE_PREFIX_PATH "${EXT_OVERLAY_DIR}")
+
+macro(nnas_include PREFIX)
+ include("${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/modules/${PREFIX}.cmake")
+endmacro(nnas_include)
+
+macro(nnas_find_package PREFIX)
+ find_package(${PREFIX}
+ CONFIG NO_DEFAULT_PATH
+ PATHS ${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/packages
+ ${ARGN})
+endmacro(nnas_find_package)
+
+macro(nnas_find_package_folder PREFIX FIND_FOLDER)
+ find_package(${PREFIX}
+ CONFIG NO_DEFAULT_PATH
+ PATHS ${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/packages ${FIND_FOLDER}
+ ${ARGN})
+endmacro(nnas_find_package_folder)
+
+###
+### CMake configuration
+###
+if(NOT CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Type of build" FORCE)
+endif(NOT CMAKE_BUILD_TYPE)
+message(STATUS "Use '${CMAKE_BUILD_TYPE}' configuration")
+
+# identify platform: HOST_PLATFORM, TARGET_PLATFORM and related
+# note: this should be placed before flags and options setting
+nnas_include(IdentifyPlatform)
+
+# Configuration flags
+include("${NNAS_PROJECT_SOURCE_DIR}/infra/onert-micro/cmake/CfgOptionFlags.cmake")
+
+# apply compilation flags
+# NOTE this should be after all option
+include("${NNAS_PROJECT_SOURCE_DIR}/infra/onert-micro/cmake/ApplyCompileFlags.cmake")
fi
# The default preset
-PRESET="20220323"
+PRESET="20221125"
# Test is enabled by default
DISABLE_TEST=false
REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
REQUIRED_UNITS+=("one-cmds")
REQUIRED_UNITS+=("bcq-tools")
+ REQUIRED_UNITS+=("dalgona")
+ REQUIRED_UNITS+=("visq")
# Dependent modules needed for build
REQUIRED_UNITS+=("circlechef")
REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
REQUIRED_UNITS+=("one-cmds")
REQUIRED_UNITS+=("bcq-tools")
+ REQUIRED_UNITS+=("dalgona")
+ REQUIRED_UNITS+=("visq")
# Dependent modules needed for build
REQUIRED_UNITS+=("circlechef")
--- /dev/null
+#!/bin/bash
+
+# NOTE purpose of this file is static analysis only
+# new official preset will be added when new programs are ready
+
+PRESET="20221125"
+
+function preset_configure()
+{
+ REQUIRED_UNITS=()
+ # Common Libraries
+ REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+ REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+ REQUIRED_UNITS+=("souschef")
+ REQUIRED_UNITS+=("safemain")
+ REQUIRED_UNITS+=("arser")
+ REQUIRED_UNITS+=("vconone")
+ # Hermes Logging Framework
+ REQUIRED_UNITS+=("hermes" "hermes-std")
+ # loco IR and related utilities
+ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+ # Flatbuffer I/O
+ REQUIRED_UNITS+=("mio-tflite280" "mio-circle04")
+ # Data I/O
+ REQUIRED_UNITS+=("dio-hdf5")
+ # Compute
+ REQUIRED_UNITS+=("luci-compute")
+ # Circle compiler library (.circle -> .circle)
+ REQUIRED_UNITS+=("luci")
+ # Python interface for circle schema
+ REQUIRED_UNITS+=("pics")
+ # Tools
+ REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef")
+ REQUIRED_UNITS+=("circle-tensordump" "circledump")
+ REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter")
+ REQUIRED_UNITS+=("luci-eval-driver")
+ REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+ REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter")
+ REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
+ REQUIRED_UNITS+=("one-cmds")
+ REQUIRED_UNITS+=("bcq-tools")
+ REQUIRED_UNITS+=("dalgona")
+ REQUIRED_UNITS+=("visq")
+ REQUIRED_UNITS+=("circle-opselector")
+
+ # Dependent modules needed for build
+ REQUIRED_UNITS+=("circlechef")
+ REQUIRED_UNITS+=("circle-verify")
+
+ NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+
+ # TODO Use "nncc configure" and "nncc build"
+ cmake \
+ -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+ -DCMAKE_BUILD_TYPE=release \
+ -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+ -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+ ${EXTRA_OPTIONS[@]} \
+ "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+ # Install tf2nnpkg
+ install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+}
--- /dev/null
+#!/bin/bash
+
+PRESET="20221125"
+
+function preset_configure()
+{
+ REQUIRED_UNITS=()
+ # Common Libraries
+ REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+ REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+ REQUIRED_UNITS+=("souschef")
+ REQUIRED_UNITS+=("safemain")
+ REQUIRED_UNITS+=("arser")
+ REQUIRED_UNITS+=("vconone")
+ # Hermes Logging Framework
+ REQUIRED_UNITS+=("hermes" "hermes-std")
+ # loco IR and related utilities
+ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+ # Flatbuffer I/O
+ REQUIRED_UNITS+=("mio-tflite280" "mio-circle04")
+ # Data I/O
+ REQUIRED_UNITS+=("dio-hdf5")
+ # Compute
+ REQUIRED_UNITS+=("luci-compute")
+ # Circle compiler library (.circle -> .circle)
+ REQUIRED_UNITS+=("luci")
+ # Python interface for circle schema
+ REQUIRED_UNITS+=("pics")
+ # Tools
+ REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef")
+ REQUIRED_UNITS+=("circle-tensordump" "circledump")
+ REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter")
+ REQUIRED_UNITS+=("luci-eval-driver")
+ REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+ REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter")
+ REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
+ REQUIRED_UNITS+=("one-cmds")
+ REQUIRED_UNITS+=("bcq-tools")
+ REQUIRED_UNITS+=("dalgona")
+ REQUIRED_UNITS+=("visq")
+
+ # Dependent modules needed for build
+ REQUIRED_UNITS+=("circlechef")
+ REQUIRED_UNITS+=("circle-verify")
+
+ NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+ # TODO Use "nncc configure" and "nncc build"
+ cmake \
+ -G "MSYS Makefiles" \
+ -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+ -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+ -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+ -DENABLE_TEST=OFF \
+ -DDOWNLOAD_GTEST=OFF \
+ -DBUILD_GTEST=OFF \
+ -DCMAKE_C_COMPILER=gcc \
+ -DCMAKE_CXX_COMPILER=g++ \
+ -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+ -DCMAKE_BUILD_TYPE=release \
+ -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+ -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+ ${EXTRA_OPTIONS[@]} \
+ "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+ # Install libraries to bin/ for Windows release
+ mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+ rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+ # Install tf2nnpkg
+ install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+ # Though you have to install tensorflow to run 'tf2tfliteV2',
+ # tensorflow can't be installed in mingw. First, You can install tensorflow
+ # from Window native CMD(run as administrator) with python virtual environment.
+ # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
--- /dev/null
+#!/bin/bash
+
+set -e
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+command_exists() {
+ if [ "$#" -le 0 ]; then
+ return 1
+ fi
+ command -v "$@" > /dev/null 2>&1
+}
+
+usage()
+{
+ echo "Convert TensorFlow model to nnpackage."
+ echo "Usage: tf2nnpkg"
+ echo " --info <path/to/info>"
+ echo " --graphdef <path/to/pb>"
+ echo " -o <path/to/nnpkg/directory>"
+ echo " --v2 (optional) Use TF 2.x interface"
+ exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+ CUR="$1"
+
+ case $CUR in
+ '--help')
+ usage
+ ;;
+ '--info')
+ export INFO_FILE="$2"
+ shift 2
+ ;;
+ '--graphdef')
+ export GRAPHDEF_FILE="$2"
+ shift 2
+ ;;
+ '-o')
+ export OUTPUT_DIR="$2"
+ shift 2
+ ;;
+ '--v2')
+ TF_INTERFACE="--v2"
+ shift
+ ;;
+ *)
+ echo "${CUR}"
+ shift
+ ;;
+ esac
+done
+
+if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then
+ echo "pb is not found. Please check --graphdef is correct."
+ exit 2
+fi
+
+if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then
+ echo "info is not found. Please check --info is correct."
+ exit 2
+fi
+
+if [ -z ${OUTPUT_DIR} ]; then
+ echo "output directory is not specifed. Please check -o is correct.."
+ exit 2
+fi
+
+FILE_BASE=$(basename ${GRAPHDEF_FILE})
+MODEL_NAME="${FILE_BASE%.*}"
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${ROOT}/bin/venv/bin/activate"
+VIRTUALENV_WINDOWS="${ROOT}/bin/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+ source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+ source ${VIRTUALENV_WINDOWS}
+fi
+
+# parse inputs, outputs from info file
+INPUT=$(awk -F, '/^input/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+
+INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
+
+ONE_IMPORT_BCQ_SCRIPT="${ROOT}/bin/one-import-bcq ${TF_INTERFACE} "
+ONE_IMPORT_BCQ_SCRIPT+="-i ${GRAPHDEF_FILE} "
+ONE_IMPORT_BCQ_SCRIPT+="-o ${TMPDIR}/${MODEL_NAME}.tmp.circle "
+ONE_IMPORT_BCQ_SCRIPT+="-I ${INPUT} "
+ONE_IMPORT_BCQ_SCRIPT+="-O ${OUTPUT} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+ ONE_IMPORT_BCQ_SCRIPT+="-s ${INPUT_SHAPES} "
+fi
+
+${ONE_IMPORT_BCQ_SCRIPT}
+
+# optimize
+"${ROOT}/bin/circle2circle" --resolve_customop_add "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+
+"${ROOT}/bin/model2nnpkg" -o "${OUTPUT_DIR}" -m "${TMPDIR}/${MODEL_NAME}.circle"
+++ /dev/null
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare ndk
-if [ ! -n "$NDK_DIR" ]; then
- export NDK_DIR=$ROOT_PATH/tools/cross/ndk/r20/ndk
- echo "It will use default external path"
-fi
-
-export TARGET_OS=android
-export CROSS_BUILD=1
-export BUILD_TYPE=release
-make -f Makefile.template install
export BACKENDS=$1
if [[ "$2" == "" ]]; then
- $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
+ $INSTALL_PATH/test/onert-test verify-tflite \
--reportdir=$ROOT_PATH/$3
else
- $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
+ $INSTALL_PATH/test/onert-test verify-tflite \
--list=$2 \
--reportdir=$ROOT_PATH/$3
fi
# Backup original nnapi_gtest.skip
# TODO Pass skiplist to test-driver.sh
- SKIPLIST_FILE="${INSTALL_PATH}/unittest/nnapi_gtest.skip"
+ SKIPLIST_FILE="${INSTALL_PATH}/nnapi-gtest/nnapi_gtest.skip"
BACKUP_FILE="${SKIPLIST_FILE}.backup"
if [[ "$2" != "" ]]; then
cp ${SKIPLIST_FILE} ${BACKUP_FILE}
export BACKENDS=$1
$INSTALL_PATH/test/onert-test unittest \
--reportdir=$ROOT_PATH/$3 \
- --unittestdir=$INSTALL_PATH/unittest
+ --unittestdir=$INSTALL_PATH/nnapi-gtest
unset BACKENDS
# TODO Pass skiplist to test-driver.sh
popd > /dev/null
}
-
-# $1: (required) backend
-# $2: (required) test list file relative path from nnfw root directory
-# pass empty string if there is no skiplist
-# $3: (required) relative path to report from nnfw root directory
-function NNAPIFrontendTest()
-{
- [[ $# -ne 3 ]] && echo "NNAPIFrontendTest: Invalid function argument setting" && exit 1
-
- pushd ${ROOT_PATH} > /dev/null
-
- export BACKENDS=$1
- if [[ "$2" == "" ]]; then
- $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
- --reportdir=$ROOT_PATH/$3
- else
- $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
- --list=$2 \
- --reportdir=$ROOT_PATH/$3
- fi
- unset BACKENDS
-
- popd > /dev/null
-}
DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo"
DEBUG_BUILD_ITEMS+=";foder;crew;souschef;arser;vconone"
DEBUG_BUILD_ITEMS+=";safemain;mio-circle04;mio-tflite280;dio-hdf5"
+DEBUG_BUILD_ITEMS+=";luci-compute"
DEBUG_BUILD_ITEMS+=";tflite2circle"
DEBUG_BUILD_ITEMS+=";luci"
DEBUG_BUILD_ITEMS+=";luci-interpreter"
DEBUG_BUILD_ITEMS+=";circle-eval-diff"
DEBUG_BUILD_ITEMS+=";circle-partitioner;circle-part-driver;circle-operator"
DEBUG_BUILD_ITEMS+=";circle-verify"
-DEBUG_BUILD_ITEMS+=";circle-tensordump"
+DEBUG_BUILD_ITEMS+=";circle-tensordump;circle-opselector"
DEBUG_BUILD_ITEMS+=";tflchef;circlechef"
DEBUG_BUILD_ITEMS+=";common-artifacts"
DEBUG_BUILD_ITEMS+=";circle2circle-dredd-recipe-test"
DEBUG_BUILD_ITEMS+=";record-minmax-conversion-test"
DEBUG_BUILD_ITEMS+=";tf2tfliteV2;tf2tfliteV2-conversion-test"
DEBUG_BUILD_ITEMS+=";tflite2circle-conversion-test"
-DEBUG_BUILD_ITEMS+=";pota-quantization-value-test"
+DEBUG_BUILD_ITEMS+=";pota-quantization-value-test;pics"
DEBUG_BUILD_ITEMS+=";circle-part-value-test"
DEBUG_BUILD_ITEMS+=";circle-quantizer-dredd-recipe-test"
DEBUG_BUILD_ITEMS+=";circle-operator-test"
+DEBUG_BUILD_ITEMS+=";circle-interpreter;circle-interpreter-test"
+DEBUG_BUILD_ITEMS+=";dalgona;dalgona-test"
+DEBUG_BUILD_ITEMS+=";visq"
+++ /dev/null
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare rootfs
-if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
- echo "It will use default rootfs path"
-else
- DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
- DOCKER_ENV_VARS+=" -e ROOTFS_DIR=/opt/rootfs"
-fi
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
- DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
- DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
- echo "It will use default external path"
-fi
-
-# docker image name
-# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
-# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
- echo "It will use default docker image name"
-fi
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
- echo "It will not use mirror server"
-fi
-
-DOCKER_ENV_VARS+=" -e TARGET_ARCH=aarch64"
-DOCKER_ENV_VARS+=" -e CROSS_BUILD=1"
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-# TODO use command instead of makefile
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-CMD="cp -nv Makefile.template Makefile && \
- make all install build_test_suite"
-./nnfw docker-run bash -c "$CMD"
-
-popd > /dev/null
+++ /dev/null
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare rootfs
-if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
- echo "It will use default rootfs path"
-else
- DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
- DOCKER_ENV_VARS+=" -e ROOTFS_DIR=/opt/rootfs"
-fi
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
- DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
- DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
- echo "It will use default external path"
-fi
-
-# docker image name
-# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
-# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
- echo "It will use default docker image name"
-fi
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
- echo "It will not use mirror server"
-fi
-
-DOCKER_ENV_VARS+=" -e TARGET_ARCH=armv7l"
-DOCKER_ENV_VARS+=" -e CROSS_BUILD=1"
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-# TODO use command instead of makefile
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-CMD="cp -nv Makefile.template Makefile && \
- make all install build_test_suite"
-./nnfw docker-run bash -c "$CMD"
-
-popd > /dev/null
+++ /dev/null
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare rootfs
-if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
- echo "It will use default rootfs path"
-else
- DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
- DOCKER_ENV_VARS+=" -e ROOTFS_DIR=/opt/rootfs"
-fi
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
- DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
- DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
- echo "It will use default external path"
-fi
-
-# docker image name
-# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
-# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
- echo "It will use default docker image name"
-fi
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
- echo "It will not use mirror server"
-fi
-
-DOCKER_ENV_VARS+=" -e TARGET_ARCH=armv7l"
-DOCKER_ENV_VARS+=" -e CROSS_BUILD=1"
-DOCKER_ENV_VARS+=" -e BUILD_TYPE=release"
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-# TODO use command instead of makefile
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-CMD="cp -nv Makefile.template Makefile && \
- make all install build_test_suite"
-./nnfw docker-run bash -c "$CMD"
-
-popd > /dev/null
+++ /dev/null
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare rootfs
-if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
- echo "It will use default rootfs path"
-else
- DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
- DOCKER_ENV_VARS+=" -e ROOTFS_DIR=/opt/rootfs"
-fi
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
- DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
- DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
- echo "It will use default external path"
-fi
-
-# docker image name
-# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
-# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
- echo "It will use default docker image name"
-fi
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
- echo "It will not use mirror server"
-fi
-
-NNAS_WORKSPACE=${NNAS_WORKSPACE:-build}
-if [[ -z "${ARCHIVE_PATH}" ]]; then
- ARCHIVE_PATH=${NNAS_WORKSPACE}/archive
-fi
-
-DOCKER_ENV_VARS+=" -e TARGET_ARCH=armv7l"
-DOCKER_ENV_VARS+=" -e CROSS_BUILD=1"
-DOCKER_ENV_VARS+=" -e COVERAGE_BUILD=1"
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-# TODO use command instead of makefile
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-CMD="cp -nv Makefile.template Makefile && \
- make all install build_coverage_suite"
-./nnfw docker-run bash -c "$CMD"
-
-mkdir -p ${ARCHIVE_PATH}
-# TODO change workspace usage in makefile
-mv Product/out/coverage-suite.tar.gz ${ARCHIVE_PATH}/
-
-popd > /dev/null
+++ /dev/null
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
- DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
- DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
- echo "It will use default external path"
-fi
-
-# docker image name
-# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
-# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
- echo "It will use default docker image name"
-fi
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
- echo "It will not use mirror server"
-fi
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-export BUILD_OPTIONS
-
-CMD="export OPTIONS='$BUILD_OPTIONS' && \
- export BUILD_TYPE=Release && \
- cp -nv Makefile.template Makefile && \
- make all install build_test_suite"
-./nnfw docker-run bash -c "$CMD"
-
-# Model download server setting
-if [[ -z $MODELFILE_SERVER ]]; then
- echo "Need model file server setting"
- exit 1
-fi
-
-export DOCKER_ENV_VARS=" -e MODELFILE_SERVER=$MODELFILE_SERVER"
-./nnfw docker-run-user ./infra/scripts/test_ubuntu_runtime.sh --backend cpu
-./nnfw docker-run-user ./infra/scripts/test_ubuntu_runtime.sh --interp
-
-popd > /dev/null
+++ /dev/null
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# prepare rootfs
-if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
- echo "It will use default rootfs path"
-else
- DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
- DOCKER_ENV_VARS+=" -e ROOTFS_DIR=/opt/rootfs"
-fi
-
-# mount volume (or directory) for externals
-if [ -n "$EXTERNAL_VOLUME" ]; then
- DOCKER_VOLUMES+=" -v $EXTERNAL_VOLUME:/externals"
- DOCKER_ENV_VARS+=" -e EXTERNAL_VOLUME=/externals"
-else
- echo "It will use default external path"
-fi
-
-# docker image name
-# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
-# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
- echo "It will use default docker image name"
-fi
-
-DOCKER_ENV_VARS+=" -e TARGET_ARCH=armv7l"
-DOCKER_ENV_VARS+=" -e CROSS_BUILD=1"
-DOCKER_ENV_VARS+=" -e TARGET_OS=tizen"
-DOCKER_ENV_VARS+=" -e BUILD_TYPE=release"
-
-# Mirror server setting
-if [[ -z $EXTERNAL_DOWNLOAD_SERVER ]]; then
- echo "It will not use mirror server"
-fi
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-CMD="export OPTIONS+=' -DGENERATE_RUNTIME_NNAPI_TESTS=ON' && \
- cp -nv Makefile.template Makefile && \
- make all install build_test_suite"
-./nnfw docker-run bash -c "$CMD"
-
-popd > /dev/null
+++ /dev/null
-#!/bin/bash
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-GBS_RPM_DIR=$ROOT_PATH/Product/out/rpm
-mkdir -p $GBS_RPM_DIR
-DOCKER_VOLUMES=" -v $GBS_RPM_DIR:/opt/rpm"
-
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
- echo "It will use default docker image name for tizen gbs build"
- DOCKER_IMAGE_NAME="nnfw_docker_tizen"
-fi
-
-DOCKER_ENV_VARS=" --privileged"
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-CMD="gbs -c $ROOT_PATH/infra/nnfw/config/gbs.conf build \
- -A armv7l --profile=profile.tizen --clean --include-all --define '$GBS_DEFINE' && \
- cp -rf /home/GBS-ROOT/local/repos/tizen/armv7l/RPMS/*.rpm /opt/rpm/"
-
-export DOCKER_ENV_VARS
-export DOCKER_VOLUMES
-./nnfw docker-run bash -c "$CMD"
-
-popd > /dev/null
REQUIRED_UNITS+=("hermes" "hermes-std")
# loco IR and related utilities
REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+# Compute
+REQUIRED_UNITS+=("luci-compute")
# Circle compiler library (.circle -> .circle)
REQUIRED_UNITS+=("luci")
# Flatbuffer I/O
+++ /dev/null
-#!/bin/bash
-
-# coverage test data: ${ARCHIVE_PATH}/coverage-data.tar.gz
-
-[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
-
-CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-ROOT_PATH="$CURRENT_PATH/../../"
-
-# docker image name
-# - for bionic, use DOCKER_IMAGE_NAME="nnfw/one-devtools:bionic"
-# - for focal, use DOCKER_IMAGE_NAME="nnfw/one-devtools:focal"
-if [[ -z $DOCKER_IMAGE_NAME ]]; then
- echo "It will use default docker image name"
-fi
-
-NNAS_WORKSPACE=${NNAS_WORKSPACE:-build}
-if [[ -z "${ARCHIVE_PATH}" ]]; then
- ARCHIVE_PATH=${NNAS_WORKSPACE}/archive
-fi
-
-set -e
-
-pushd $ROOT_PATH > /dev/null
-
-tar -zxf ${ARCHIVE_PATH}/coverage-data.tar.gz
-
-CMD="GCOV_PATH=arm-linux-gnueabihf-gcov NNAS_WORKSPACE=Product ./nnas gen-coverage-report runtime compute &&
- tar -zcf coverage/coverage_report.tar.gz coverage/html &&
- python runtime/3rdparty/lcov-to-cobertura-xml/lcov_cobertura.py coverage/coverage.info -o coverage/nnfw_coverage.xml"
-
-./nnfw docker-run-user bash -c "$CMD"
-
-popd > /dev/null
NNPackageTest ${BACKEND} "Product/out/test/list/nnpkg_test_list.armv7l-linux.${BACKEND}"
done
-# Interpreter test
-export DISABLE_COMPILE=1
-NNPackageTest "interp" "Product/out/test/list/nnpkg_test_list.noarch.interp"
unset DISABLE_COMPILE
TFLiteModelVerification "acl_cl" "Product/out/test/list/tflite_comparator.armv7l.acl_cl.list" "report/acl_cl/trace"
unset TRACE_FILEPATH
-# Interpreter
-./infra/scripts/test_ubuntu_runtime.sh --interp
-
# nnpackage test suite
if [[ -e ${ARCHIVE_PATH}/nnpkg-test-suite.tar.gz ]]; then
tar -zxf ${ARCHIVE_PATH}/nnpkg-test-suite.tar.gz -C ./
--- /dev/null
+#!/bin/bash
+
+set -eo pipefail
+
+CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ROOT_PATH="$(cd ${CURRENT_PATH}/../../ && pwd)"
+
+# Install path on CI
+INSTALL_PATH="$ROOT_PATH/Product/out"
+MODEL_PATH="${INSTALL_PATH}/npud-gtest/models"
+
+# Install dbus configuration file
+DBUS_CONF="${INSTALL_PATH}/share/org.tizen.npud.conf"
+mkdir -p /usr/share/dbus-1/system.d/
+cp ${DBUS_CONF} /usr/share/dbus-1/system.d/
+
+service dbus restart
+
+function TestPrepared()
+{
+ if [[ -z "${MODELFILE}" ]]; then
+ echo "Model file is not set. Try to use default setting."
+ exit 1
+ fi
+
+ mkdir -p ${MODEL_PATH}
+ if [[ "${MODELFILE: -7}" == ".tar.gz" ]]; then
+ curl -o model.tar.gz -kLsSO ${MODELFILE}
+ tar -zxf model.tar.gz -C ${MODEL_PATH}
+ else
+ echo "The file format is not supported."
+ echo "Supported format: tar.gz"
+ exit 1
+ fi
+}
+
+function TestCleanUp()
+{
+ rm -rf ${MODEL_PATH}
+}
+
+function NpudTest()
+{
+ pushd ${ROOT_PATH} > /dev/null
+
+ $INSTALL_PATH/npud-gtest/npud_gtest
+ EXITCODE=$?
+ if [ ${EXITCODE} -ne 0 ]; then
+ exit ${EXITCODE}
+ fi
+
+ popd > /dev/null
+}
+
+TestPrepared
+
+DEVICE_MODULE_PATH=${INSTALL_PATH}/lib GTEST_MODEL_PATH=${MODEL_PATH} NpudTest
+
+TestCleanUp
BACKEND="cpu"
TEST_OS="linux"
TEST_PLATFORM="$TEST_ARCH-$TEST_OS"
-TFLITE_LOADER="1"
LINEAR_ONLY="0"
RUN_INTERP="0"
-NNAPI_FRONTEND="0"
function Usage()
{
echo ""
echo "Options:"
echo " --backend <BACKEND> Runtime backend to test (default: ${BACKEND})"
- echo " --nnapi-frontend NNAPI Frontend test"
echo " --linear-only Use Linear executor only"
}
BACKEND=$(echo ${1#*=} | tr '[:upper:]' '[:lower:]')
shift
;;
- --tflite-loader)
- TFLITE_LOADER="1"
- NNAPI_FRONTEND="1" # For CI test
- echo "[INFO] \"--tflite-loader\" argument is deprecated"
- shift
- ;;
- --nnapi-frontend)
- NNAPI_FRONTEND="1"
- shift
- ;;
--linear-only)
LINEAR_ONLY="1"
shift
;;
- --interp)
- RUN_INTERP="1"
- shift;
- ;;
*)
# Ignore
shift
CheckTestPrepared
-if [ $RUN_INTERP = "1" ]; then
- TEST_PLATFORM="noarch"
- TEST_ARCH="noarch"
- BACKEND="interp"
- echo "[[ Interpreter test ]]"
-else
- echo "[[ ${TEST_PLATFORM}: ${BACKEND} backend test ]]"
-fi
+echo "[[ ${TEST_PLATFORM}: ${BACKEND} backend test ]]"
-UNITTEST_SKIPLIST="Product/out/unittest/nnapi_gtest.skip.${TEST_PLATFORM}.${BACKEND}"
+UNITTEST_SKIPLIST="Product/out/nnapi-gtest/nnapi_gtest.skip.${TEST_PLATFORM}.${BACKEND}"
TFLITE_TESTLIST="Product/out/test/list/tflite_comparator.${TEST_ARCH}.${BACKEND}.list"
REPORT_BASE="report/${BACKEND}"
EXECUTORS=("Linear" "Dataflow" "Parallel")
if [ $LINEAR_ONLY = "1" ]; then
EXECUTORS=("Linear")
fi
-if [ $RUN_INTERP = "1" ]; then
- EXECUTORS=("Interpreter")
-fi
for EXECUTOR in "${EXECUTORS[@]}";
do
echo "[EXECUTOR]: ${EXECUTOR}"
REPORT_PATH="${REPORT_BASE}/${EXECUTOR}"
- if [ $EXECUTOR = "Interpreter" ]; then
- export DISABLE_COMPILE=1
- BACKEND=""
- else
- export EXECUTOR="${EXECUTOR}"
- fi
+ export EXECUTOR="${EXECUTOR}"
NNAPIGTest "${BACKEND}" "${UNITTEST_SKIPLIST}" "${REPORT_PATH}"
TFLiteModelVerification "${BACKEND}" "${TFLITE_TESTLIST}" "${REPORT_PATH}"
- if [ $EXECUTOR = "Interpreter" ]; then
- unset DISABLE_COMPILE
- else
- unset EXECUTOR
- fi
+ unset EXECUTOR
done
-
-# TODO Support more backends
-NNAPI_FRONTEND_TESTLIST="Product/out/test/list/nnapi_test.${TEST_ARCH}.list"
-if [[ $NNAPI_FRONTEND = "1" ]]; then
- NNAPIFrontendTest "${BACKEND}" "${NNAPI_FRONTEND_TESTLIST}" "${REPORT_BASE}/nnapi/${EXECUTOR}"
-fi
echo ""
echo "==== Run standalone unittest begin ===="
echo ""
-Product/out/test/onert-test unittest --unittestdir=Product/out/unittest_standalone
+Product/out/test/onert-test unittest --unittestdir=Product/out/unittest
echo ""
echo "==== Run standalone unittest end ===="
echo ""
# Get the intersect of framework test list files
TESTLIST_PREFIX="Product/out/test/list/tflite_comparator.${TEST_ARCH}"
-SKIPLIST_PREFIX="Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}"
+SKIPLIST_PREFIX="Product/out/nnapi-gtest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}"
sort $TESTLIST_PREFIX.${BACKENDS[0]}.list > $TESTLIST_PREFIX.intersect.list
sort $SKIPLIST_PREFIX.${BACKENDS[0]} > $SKIPLIST_PREFIX.union
for BACKEND in "${BACKENDS[@]:1}"; do
export OP_BACKEND_FullyConnected="acl_neon"
export ACL_LAYOUT="NCHW"
export RUY_THREADS=4
-NNAPIGTest "acl_cl;acl_neon;cpu" "Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed"
+NNAPIGTest "acl_cl;acl_neon;cpu" "Product/out/nnapi-gtest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed"
TFLiteModelVerification "acl_cl;acl_neon;cpu" "${TESTLIST_PREFIX}.intersect.list" "report/mixed"
{
# download tflite model files
pushd $HOST_HOME
- tests/scripts/models/run_test.sh --download=on --run=off
+ TEMP_PATH=$(mktemp -d)
+ CACHE_PATH=$TEMP_PATH/cache
+ mkdir -p $CACHE_PATH
+ ./nnfw prepare-model --cachedir=$CACHE_PATH
# TODO Since this command removes model file(.zip),
# We must always download the file unlike model file(.tflite).
# Because caching applies only to tflite file.
- find tests -name "*.zip" -exec rm {} \;
- tar -zcf cache.tar.gz -C tests/scripts/models cache
- $SDB_CMD push cache.tar.gz $TEST_ROOT/.
- rm -rf cache.tar.gz
- $SDB_CMD shell tar -zxf $TEST_ROOT/cache.tar.gz -C $TEST_ROOT/Product/out/test/models
+ find $CACHE_PATH -name "*.zip" -exec rm {} \;
+ tar -zcf $TEMP_PATH/cache.tar.gz -C $TEMP_PATH cache
+ $SDB_CMD push $TEMP_PATH/cache.tar.gz $TEST_ROOT/
+ rm -rf $TEMP_PATH
+ $SDB_CMD shell tar -zxf $TEST_ROOT/cache.tar.gz -C $TEST_ROOT/Product/out/test
popd
}
${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_neon"
${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend cpu"
${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime_mixed.sh"
- ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --interp"
else
mkdir -p ${GCOV_DIR}
rm -rf ${GCOV_DIR}/*
${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_neon"
${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend cpu"
${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime_mixed.sh"
- ${SDB_CMD} shell /bin/bash -c "GCOV_PREFIX_STRIP=${GCOV_PREFIX_STRIP} IGNORE_MD5=1 TEST_ARCH=armv7l ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --interp"
# More test to check coverage
${SDB_CMD} shell "rm -rf ${GCOV_DATA_PATH} && mkdir -p ${GCOV_DATA_PATH}"
--- /dev/null
+set(ARM_C_COMPILER "arm-none-eabi-gcc")
+set(ARM_ASM_COMPILER "arm-none-eabi-gcc")
+set(ARM_CXX_COMPILER "arm-none-eabi-g++")
+set(ARM_OBJCOPY "arm-none-eabi-objcopy")
+
+find_program(ARM_C_COMPILER_PATH ${ARM_C_COMPILER})
+
+if (NOT ARM_C_COMPILER_PATH)
+ message(STATUS "Build luci-micro: FALSE(ARM compiler is NOT FOUND)")
+ return()
+endif ()
+
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+
+if (NOT FlatBuffers_FOUND)
+ message(STATUS "Build luci-micro: FALSE(FlatBuffers 2.0 NOT FOUND)")
+ return()
+endif (NOT FlatBuffers_FOUND)
+
+message(STATUS "Build luci-micro: TRUE")
+
+set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.4/circle_schema.fbs")
+
+# NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs"
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+ COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+ WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+ DEPENDS "${SCHEMA_FILE}"
+ )
+
+FlatBuffers_Target(luci_micro_circle_schema
+ OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/circle-generated/circle"
+ INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+ SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+ SCHEMA_FILES "schema.fbs"
+ )
+
+set(LUCI_INTERPRETER_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/include")
+add_subdirectory(luci-interpreter/src/core/reader)
+
+# Choosing Kernel: reference mcu, optimized cmsisnn, optimized linux
+if (NOT KERNELS)
+ message(STATUS "KERNEL variable is not defined, default reference mcu kernels will be used")
+ set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/pal/mcu")
+elseif("${KERNELS}" STREQUAL "mcu")
+ message(STATUS "ONERT_MICRO will use reference mcu kernels")
+ set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/pal/mcu")
+elseif("${KERNELS}" STREQUAL "cmsisnn")
+ message(STATUS "ONERT_MICRO will use optimized cmsisnn kernels")
+ set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/pal/cmsisnn")
+elseif("${KERNELS}" STREQUAL "linux")
+ message(STATUS "ONERT_MICRO will use optimized linux kernels")
+ set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/pal/linux")
+else()
+ message(STATUS "Build onert-micro: FAILED (Non-existent kernel variable. Choose one of the following options: mcu, cmsisnn, linux)")
+ return()
+endif()
+
+if (USE_STATIC_ALLOC)
+ # TODO: enable it
+ message(STATUS "FAILED ONERT-MICRO is not support Static Memory Manager now")
+ return()
+else()
+ message(STATUS "USE_STATIC_ALLOC variable is not defined, default dynamic memory manager will be used")
+endif()
+
+set(CMAKE_ARM_OPTIONS
+ -DLUCI_INTERPRETER_STATIC=ON
+ -DLUCI_STATIC=ON
+ -DBUILD_CMSIS_NN_FUNCTIONS=ON
+ -DTARGET_CPU=${TARGET_CPU}
+ -DTARGET_ARCH=${TARGET_ARCH}
+ "-DEXT_OVERLAY_DIR=${CMAKE_CURRENT_BINARY_DIR}/../../overlay"
+ "-DFlatbuffers_DIR=${CMAKE_CURRENT_BINARY_DIR}/../../overlay/lib/cmake/flatbuffers"
+ "-DCMAKE_TOOLCHAIN_FILE=${NNAS_PROJECT_SOURCE_DIR}/infra/onert-micro/cmake/buildtool/config/arm-none-eabi-gcc.cmake"
+ "-DLUCI_INTERPRETER_PAL_DIR=${LUCI_INTERPRETER_PAL_DIR}"
+ "-DNNAS_PROJECT_SOURCE_DIR=${NNAS_PROJECT_SOURCE_DIR}"
+ "-DNNAS_EXTERNALS_DIR=${NNAS_EXTERNALS_DIR}"
+ -DC_COMPILER=${ARM_C_COMPILER}
+ -DCXX_COMPILER=${ARM_CXX_COMPILER}
+ -DASM_COMPILER=${ARM_ASM_COMPILER}
+ -DOBJCOPY=${ARM_OBJCOPY}
+ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+ -DENABLE_TEST=OFF
+ -DBUILD_GTEST=OFF
+ "-DNNAS_ROOT=${NNAS_PROJECT_SOURCE_DIR}"
+ -DENABLE_STRICT_BUILD=OFF
+ "-DGENERATED_INCLUDE_DIR=${CMAKE_CURRENT_BINARY_DIR}/gen"
+ )
+
+if (GENERATE_KERNELS_LIST_FROM)
+ set(GENERATED_KERNELS_LIST_PATH "${LUCI_INTERPRETER_PAL_DIR}/GeneratedKernelsToBuild.lst")
+ list(APPEND CMAKE_ARM_OPTIONS "-DLUCI_INTERPRETER_KERNELS_BUILD_LIST=${GENERATED_KERNELS_LIST_PATH}")
+endif ()
+
+if (DIS_QUANT)
+ message(STATUS "ONERT-MICRO will not use part for QUANTIZED models")
+ add_definitions(-DDIS_QUANT)
+ list(APPEND CMAKE_ARM_OPTIONS "-DDIS_QUANT=ON")
+endif()
+
+if (DIS_FLOAT)
+ message(STATUS "ONERT-MICRO will not use part for FLOAT models")
+ add_definitions(-DDIS_FLOAT)
+ list(APPEND CMAKE_ARM_OPTIONS "-DDIS_FLOAT=ON")
+endif()
+
+set(MICRO_ARM_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/standalone_arm")
+file(MAKE_DIRECTORY "${MICRO_ARM_BUILD_DIR}")
+
+set(MICRO_ARM_BUILD_DEPENDENCY "${MICRO_ARM_BUILD_DIR}/CMakeCache.txt")
+
+add_custom_command(
+ OUTPUT "${MICRO_ARM_BUILD_DEPENDENCY}"
+ COMMAND "${CMAKE_COMMAND}" "${CMAKE_CURRENT_SOURCE_DIR}/standalone" ${CMAKE_ARM_OPTIONS}
+ WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
+ DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/standalone/CMakeLists.txt"
+ VERBATIM
+)
+
+add_custom_target(luci_interpreter_micro_arm_cmake DEPENDS "${MICRO_ARM_BUILD_DEPENDENCY}")
+
+# Generate KernelsToBuild list from circle model
+if (GENERATE_KERNELS_LIST_FROM)
+ add_executable(generator_kernels_list_exec helpers/GenerateKernelsListHelper.cpp)
+
+ target_link_libraries(generator_kernels_list_exec luci_micro_circle_reader)
+ target_link_libraries(generator_kernels_list_exec luci_micro_circle_schema)
+
+ target_include_directories(generator_kernels_list_exec PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/include")
+
+ add_custom_target(
+ generate_kernels_list ALL
+ COMMAND generator_kernels_list_exec ${GENERATE_KERNELS_LIST_FROM} ${GENERATED_KERNELS_LIST_PATH}
+ COMMENT "Generating KernelsToBuild list"
+ )
+ add_dependencies(generate_kernels_list luci_micro_circle_reader)
+ add_dependencies(luci_interpreter_micro_arm_cmake generate_kernels_list)
+
+endif ()
+
+# To remove GENERATE_KERNELS_LIST_FROM and KERNELS variable from cmake cache
+unset(GENERATE_KERNELS_LIST_FROM CACHE)
+unset(KERNELS CACHE)
+unset(USE_STATIC_KERNEL CACHE)
+unset(DIS_QUANT CACHE)
+unset(DIS_FLOAT CACHE)
+
+set(MICRO_ARM_BINARY "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/libluci_interpreter_micro.a")
+
+add_custom_command(
+ OUTPUT "${MICRO_ARM_BINARY}"
+ COMMAND "${CMAKE_MAKE_PROGRAM}" luci_interpreter_micro -j ${CPU_COUNT}
+ WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
+ DEPENDS luci_interpreter_micro_arm_cmake luci_micro_circle_schema
+ VERBATIM
+)
+
+add_custom_target(luci_interpreter_micro_arm DEPENDS "${MICRO_ARM_BINARY}")
+
+add_subdirectory(eval-driver)
+
+if (NOT DEFINED BUILD_TEST)
+ return()
+endif ()
+
+#MBED OS QEMU build
+nnas_find_package(MbedOSSource EXACT 6.15 QUIET)
+
+if (NOT MbedOSSource_FOUND)
+ message(STATUS "Skipping luci-micro: MbedOSSource not found")
+ return()
+endif ()
+
+set(MBED_OS_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/tests/mbed-os")
+file(MAKE_DIRECTORY "${MBED_OS_BUILD_DIR}")
+
+set(MBED_OS_BUILD_DEPENDENCY "${MBED_OS_BUILD_DIR}/CMakeCache.txt")
+
+set(ONERTMICRO_SRC_DIR "${NNAS_PROJECT_SOURCE_DIR}/onert-micro")
+
+add_custom_command(
+ OUTPUT "${MBED_OS_BUILD_DEPENDENCY}"
+ COMMAND "${CMAKE_COMMAND}" "${CMAKE_CURRENT_SOURCE_DIR}/tests/mbed-os"
+ -DMICRO_ARM_BUILD_DIR=${MICRO_ARM_BUILD_DIR}
+ -DMbedOSSource_DIR=${MbedOSSource_DIR}
+ -DFlatBuffersSource_DIR=${FlatBuffersSource_DIR}
+ -DONERTMICRO_SRC_DIR=${ONERTMICRO_SRC_DIR}
+ WORKING_DIRECTORY "${MBED_OS_BUILD_DIR}"
+ DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/tests/mbed-os/CMakeLists.txt"
+ VERBATIM
+)
+
+add_custom_target(mbed_os_cmake DEPENDS "${MBED_OS_BUILD_DEPENDENCY}")
+
+set(MBED_OS_BINARY "${MBED_OS_BUILD_DIR}/libmbed_os.a")
+
+add_custom_command(
+ OUTPUT "${MBED_OS_BINARY}"
+ COMMAND "${CMAKE_MAKE_PROGRAM}" mbed_os -j ${CPU_COUNT}
+ WORKING_DIRECTORY "${MBED_OS_BUILD_DIR}"
+ DEPENDS mbed_os_cmake
+ VERBATIM
+)
+
+add_custom_target(mbed_os_arm DEPENDS "${MBED_OS_BINARY}")
+
+set(BUILD_TEST_BINARY "${MBED_OS_BUILD_DIR}/build_test.bin")
+
+add_custom_command(
+ OUTPUT "${BUILD_TEST_BINARY}"
+ COMMAND "${CMAKE_MAKE_PROGRAM}" build_test -j ${CPU_COUNT}
+ WORKING_DIRECTORY "${MBED_OS_BUILD_DIR}"
+ DEPENDS mbed_os_arm "${CMAKE_CURRENT_SOURCE_DIR}/tests/mbed-os/main.cpp" ${MICRO_ARM_BINARY}
+ VERBATIM
+)
+add_custom_target(onert_micro_build_test_arm DEPENDS "${BUILD_TEST_BINARY}")
--- /dev/null
+set(SRCS_EVAL_TESTER Driver.cpp)
+
+add_executable(onert_micro_eval_driver ${SRCS_EVAL_TESTER})
+
+# This variable is needed to separate standalone interpreter libraries from the libraries used in driver
+set(READER_SUFFIX "_driver")
+
+add_subdirectory(${NNAS_PROJECT_SOURCE_DIR}/onert-micro/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter)
+
+target_include_directories(onert_micro_eval_driver PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/luci-interpreter/include")
+target_link_libraries(onert_micro_eval_driver PUBLIC luci_interpreter_micro)
+
+install(TARGETS onert_micro_eval_driver DESTINATION bin)
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci_interpreter/Interpreter.h>
+
+#include <stdexcept>
+#include <cstdlib>
+#include <fstream>
+#include <vector>
+#include <string>
+#include <iostream>
+
+namespace
+{
+
+using DataBuffer = std::vector<char>;
+
+void readDataFromFile(const std::string &filename, char *data, size_t data_size)
+{
+ std::ifstream fs(filename, std::ifstream::binary);
+ if (fs.fail())
+ throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+ if (fs.read(data, data_size).fail())
+ throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
+}
+
+void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
+{
+ std::ofstream fs(filename, std::ofstream::binary);
+ if (fs.fail())
+ throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+ if (fs.write(data, data_size).fail())
+ {
+ throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+ }
+}
+
+} // namespace
+
+/*
+ * @brief EvalDriver main
+ *
+ * Driver for testing luci-inerpreter
+ *
+ */
+int entry(int argc, char **argv)
+{
+ if (argc != 5)
+ {
+ std::cerr
+ << "Usage: " << argv[0]
+ << " <path/to/circle/model> <num_inputs> <path/to/input/prefix> <path/to/output/file>\n";
+ return EXIT_FAILURE;
+ }
+
+ const char *filename = argv[1];
+ const int32_t num_inputs = atoi(argv[2]);
+ const char *input_prefix = argv[3];
+ const char *output_file = argv[4];
+
+ std::ifstream file(filename, std::ios::binary | std::ios::in);
+ if (!file.good())
+ {
+ std::string errmsg = "Failed to open file";
+ throw std::runtime_error(errmsg.c_str());
+ }
+
+ file.seekg(0, std::ios::end);
+ auto fileSize = file.tellg();
+ file.seekg(0, std::ios::beg);
+
+ // reserve capacity
+ DataBuffer model_data(fileSize);
+
+ // read the data
+ file.read(model_data.data(), fileSize);
+ if (file.fail())
+ {
+ std::string errmsg = "Failed to read file";
+ throw std::runtime_error(errmsg.c_str());
+ }
+
+ // Create interpreter.
+ luci_interpreter::Interpreter interpreter(model_data.data());
+
+ // Set input.
+ // Data for n'th input is read from ${input_prefix}n
+ // (ex: Add.circle.input0, Add.circle.input1 ..)
+ int num_inference = 1;
+ for (int j = 0; j < num_inference; ++j)
+ {
+ for (int32_t i = 0; i < num_inputs; i++)
+ {
+ auto input_data = reinterpret_cast<char *>(interpreter.allocateInputTensor(i));
+ readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data,
+ interpreter.getInputDataSizeByIndex(i));
+ }
+
+ // Do inference.
+ interpreter.interpret();
+ }
+
+ // Get output.
+ int num_outputs = 1;
+ for (int i = 0; i < num_outputs; i++)
+ {
+ auto data = interpreter.readOutputTensor(i);
+
+ // Output data is written in ${output_file}
+ // (ex: Add.circle.output0)
+ writeDataToFile(std::string(output_file) + std::to_string(i), reinterpret_cast<char *>(data),
+ interpreter.getOutputDataSizeByIndex(i));
+ }
+ return EXIT_SUCCESS;
+}
+
+int entry(int argc, char **argv);
+
+#ifdef NDEBUG
+int main(int argc, char **argv)
+{
+ try
+ {
+ return entry(argc, argv);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "ERROR: " << e.what() << std::endl;
+ }
+
+ return 255;
+}
+#else // NDEBUG
+int main(int argc, char **argv)
+{
+ // NOTE main does not catch internal exceptions for debug build to make it easy to
+ // check the stacktrace with a debugger
+ return entry(argc, argv);
+}
+#endif // !NDEBUG
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <circle-generated/circle/schema_generated.h>
+
+#include <iostream>
+#include <fstream>
+#include <set>
+
+std::string get_register_kernel_str(const circle::BuiltinOperator builtin_operator)
+{
+ switch (builtin_operator)
+ {
+ case circle::BuiltinOperator_ADD:
+ return "REGISTER_KERNEL(ADD, Add)";
+ case circle::BuiltinOperator_ARG_MAX:
+ return "REGISTER_KERNEL(ARG_MAX, ArgMax)";
+ case circle::BuiltinOperator_AVERAGE_POOL_2D:
+ return "REGISTER_KERNEL(AVERAGE_POOL_2D, AveragePool2D)";
+ case circle::BuiltinOperator_BATCH_TO_SPACE_ND:
+ return "REGISTER_KERNEL(BATCH_TO_SPACE_ND, BatchToSpaceND)";
+ case circle::BuiltinOperator_CAST:
+ return "REGISTER_KERNEL(CAST, Cast)";
+ case circle::BuiltinOperator_CONCATENATION:
+ return "REGISTER_KERNEL(CONCATENATION, Concatenation)";
+ case circle::BuiltinOperator_CONV_2D:
+ return "REGISTER_KERNEL(CONV_2D, Conv2D)";
+ case circle::BuiltinOperator_DEPTH_TO_SPACE:
+ return "REGISTER_KERNEL(DEPTH_TO_SPACE, DepthToSpace)";
+ case circle::BuiltinOperator_DEPTHWISE_CONV_2D:
+ return "REGISTER_KERNEL(DEPTHWISE_CONV_2D, DepthwiseConv2D)";
+ case circle::BuiltinOperator_DEQUANTIZE:
+ return "REGISTER_KERNEL(DEQUANTIZE, Dequantize)";
+ case circle::BuiltinOperator_DIV:
+ return "REGISTER_KERNEL(DIV, Div)";
+ case circle::BuiltinOperator_ELU:
+ return "REGISTER_KERNEL(ELU, Elu)";
+ case circle::BuiltinOperator_EXP:
+ return "REGISTER_KERNEL(EXP, Exp)";
+ case circle::BuiltinOperator_EXPAND_DIMS:
+ return "REGISTER_KERNEL(EXPAND_DIMS, ExpandDims)";
+ case circle::BuiltinOperator_FILL:
+ return "REGISTER_KERNEL(FILL, Fill)";
+ case circle::BuiltinOperator_FLOOR:
+ return "REGISTER_KERNEL(FLOOR, Floor)";
+ case circle::BuiltinOperator_FLOOR_DIV:
+ return "REGISTER_KERNEL(FLOOR_DIV, FloorDiv)";
+ case circle::BuiltinOperator_EQUAL:
+ return "REGISTER_KERNEL(EQUAL, Equal)";
+ case circle::BuiltinOperator_FULLY_CONNECTED:
+ return "REGISTER_KERNEL(FULLY_CONNECTED, FullyConnected)";
+ case circle::BuiltinOperator_GREATER:
+ return "REGISTER_KERNEL(GREATER, Greater)";
+ case circle::BuiltinOperator_GREATER_EQUAL:
+ return "REGISTER_KERNEL(GREATER_EQUAL, GreaterEqual)";
+ case circle::BuiltinOperator_INSTANCE_NORM:
+ return "REGISTER_KERNEL(INSTANCE_NORM, InstanceNorm)";
+ case circle::BuiltinOperator_L2_NORMALIZATION:
+ return "REGISTER_KERNEL(L2_NORMALIZATION, L2Normalize)";
+ case circle::BuiltinOperator_L2_POOL_2D:
+ return "REGISTER_KERNEL(L2_POOL_2D, L2Pool2D)";
+ case circle::BuiltinOperator_LEAKY_RELU:
+ return "REGISTER_KERNEL(LEAKY_RELU, LeakyRelu)";
+ case circle::BuiltinOperator_LESS:
+ return "REGISTER_KERNEL(LESS, Less)";
+ case circle::BuiltinOperator_LESS_EQUAL:
+ return "REGISTER_KERNEL(LESS_EQUAL, LessEqual)";
+ case circle::BuiltinOperator_LOGICAL_AND:
+ return "REGISTER_KERNEL(LOGICAL_AND, LogicalAnd)";
+ case circle::BuiltinOperator_LOGICAL_NOT:
+ return "REGISTER_KERNEL(LOGICAL_NOT, LogicalNot)";
+ case circle::BuiltinOperator_LOGICAL_OR:
+ return "REGISTER_KERNEL(LOGICAL_OR, LogicalOr)";
+ case circle::BuiltinOperator_LOGISTIC:
+ return "REGISTER_KERNEL(LOGISTIC, Logistic)";
+ case circle::BuiltinOperator_MAXIMUM:
+ return "REGISTER_KERNEL(MAXIMUM, Maximum)";
+ case circle::BuiltinOperator_MAX_POOL_2D:
+ return "REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D)";
+ case circle::BuiltinOperator_MINIMUM:
+ return "REGISTER_KERNEL(MINIMUM, Minimum)";
+ case circle::BuiltinOperator_MIRROR_PAD:
+ return "REGISTER_KERNEL(MIRROR_PAD, MirrorPad)";
+ case circle::BuiltinOperator_MUL:
+ return "REGISTER_KERNEL(MUL, Mul)";
+ case circle::BuiltinOperator_NEG:
+ return "REGISTER_KERNEL(NEG, Neg)";
+ case circle::BuiltinOperator_NOT_EQUAL:
+ return "REGISTER_KERNEL(NOT_EQUAL, NotEqual)";
+ case circle::BuiltinOperator_PAD:
+ return "REGISTER_KERNEL(PAD, Pad)";
+ case circle::BuiltinOperator_PADV2:
+ return "REGISTER_KERNEL(PADV2, PadV2)";
+ case circle::BuiltinOperator_PRELU:
+ return "REGISTER_KERNEL(PRELU, PRelu)";
+ case circle::BuiltinOperator_QUANTIZE:
+ return "REGISTER_KERNEL(QUANTIZE, Quantize)";
+ case circle::BuiltinOperator_RESHAPE:
+ return "REGISTER_KERNEL(RESHAPE, Reshape)";
+ case circle::BuiltinOperator_RESIZE_BILINEAR:
+ return "REGISTER_KERNEL(RESIZE_BILINEAR, ResizeBilinear)";
+ case circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
+ return "REGISTER_KERNEL(RESIZE_NEAREST_NEIGHBOR, ResizeNearestNeighbor)";
+ case circle::BuiltinOperator_RSQRT:
+ return "REGISTER_KERNEL(RSQRT, Rsqrt)";
+ case circle::BuiltinOperator_SHAPE:
+ return "REGISTER_KERNEL(SHAPE, Shape)";
+ case circle::BuiltinOperator_SOFTMAX:
+ return "REGISTER_KERNEL(SOFTMAX, Softmax)";
+ case circle::BuiltinOperator_SPACE_TO_BATCH_ND:
+ return "REGISTER_KERNEL(SPACE_TO_BATCH_ND, SpaceToBatchND)";
+ case circle::BuiltinOperator_SPACE_TO_DEPTH:
+ return "REGISTER_KERNEL(SPACE_TO_DEPTH, SpaceToDepth)";
+ case circle::BuiltinOperator_STRIDED_SLICE:
+ return "REGISTER_KERNEL(STRIDED_SLICE, StridedSlice)";
+ case circle::BuiltinOperator_SQRT:
+ return "REGISTER_KERNEL(SQRT, Sqrt)";
+ case circle::BuiltinOperator_SQUARE:
+ return "REGISTER_KERNEL(SQUARE, Square)";
+ case circle::BuiltinOperator_SQUARED_DIFFERENCE:
+ return "REGISTER_KERNEL(SQUARED_DIFFERENCE, SquaredDifference)";
+ case circle::BuiltinOperator_SQUEEZE:
+ return "REGISTER_KERNEL(SQUEEZE, Squeeze)";
+ case circle::BuiltinOperator_SUB:
+ return "REGISTER_KERNEL(SUB, Sub)";
+ case circle::BuiltinOperator_SVDF:
+ return "REGISTER_KERNEL(SVDF, SVDF)";
+ case circle::BuiltinOperator_TANH:
+ return "REGISTER_KERNEL(TANH, Tanh)";
+ case circle::BuiltinOperator_TRANSPOSE:
+ return "REGISTER_KERNEL(TRANSPOSE, Transpose)";
+ case circle::BuiltinOperator_TRANSPOSE_CONV:
+ return "REGISTER_KERNEL(TRANSPOSE_CONV, TransposeConv)";
+ case circle::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
+ return "REGISTER_KERNEL(UNIDIRECTIONAL_SEQUENCE_LSTM, UnidirectionalSequenceLSTM)";
+ default:
+ assert(false && "Not supported kernel");
+ }
+}
+
+std::vector<char> loadFile(const std::string &path)
+{
+ std::ifstream file(path, std::ios::binary | std::ios::in);
+ if (!file.good())
+ {
+ assert(false && "Failed to open file");
+ }
+
+ file.unsetf(std::ios::skipws);
+
+ file.seekg(0, std::ios::end);
+ auto fileSize = file.tellg();
+ file.seekg(0, std::ios::beg);
+
+ // reserve capacity
+ std::vector<char> data(fileSize);
+
+ // read the data
+ file.read(data.data(), fileSize);
+ if (file.fail())
+ {
+ assert(false && "Failed to read file");
+ }
+
+ return data;
+}
+
+// Parse model and write to std::ofstream &os models operations
+void run(std::ofstream &os, const circle::Model *model)
+{
+ luci_interpreter::CircleReader reader;
+ reader.parse(model);
+ const uint32_t subgraph_size = reader.num_subgraph();
+
+ // Set to avoid duplication in generated list
+ std::set<circle::BuiltinOperator> operations_set;
+
+ for (uint32_t g = 0; g < subgraph_size; g++)
+ {
+ reader.select_subgraph(g);
+ auto ops = reader.operators();
+ for (uint32_t i = 0; i < ops.size(); ++i)
+ {
+ const auto op = ops.at(i);
+ auto op_builtin_operator = reader.builtin_code(op);
+
+ auto result = operations_set.insert(op_builtin_operator);
+ if (result.second)
+ {
+ os << get_register_kernel_str(op_builtin_operator) << std::endl;
+ }
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ if (argc != 3)
+ {
+ assert(false && "Should be 2 arguments: circle model path, and path for generated model\n");
+ }
+
+ std::string model_file(argv[1]);
+ std::string generated_file_path(argv[2]);
+
+ std::vector<char> model_data = loadFile(model_file);
+ const circle::Model *circle_model = circle::GetModel(model_data.data());
+
+ if (circle_model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load circle '" << model_file << "'" << std::endl;
+ return 255;
+ }
+
+ // Open or create file
+ std::ofstream out;
+ out.open(generated_file_path);
+
+ if (out.is_open())
+ run(out, circle_model);
+ else
+ std::cout << "SMTH GOES WRONG WHILE OPEN FILE" << std::endl;
+ return 0;
+}
--- /dev/null
+set(LUCI_INTERPRETER_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
+set(LUCI_INTERPRETER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
+if (NOT LUCI_INTERPRETER_PAL_DIR)
+ set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pal/mcu")
+endif()
+
+if (NOT LUCI_INTERPRETER_KERNELS_BUILD_LIST)
+ set(KERNEL_REGISTER_FILE "${LUCI_INTERPRETER_PAL_DIR}/KernelsToBuild.lst")
+else()
+ set(KERNEL_REGISTER_FILE ${LUCI_INTERPRETER_KERNELS_BUILD_LIST})
+endif()
+
+if (NOT DEFINED CUSTOM_LUCI_INTERPRETER_SUFFIX)
+ set(LUCI_INTERPRETER_SUFFIX "")
+else()
+ set(LUCI_INTERPRETER_SUFFIX ${CUSTOM_LUCI_INTERPRETER_SUFFIX})
+endif()
+
+if (DIS_QUANT)
+ add_definitions(-DDIS_QUANT)
+endif()
+
+if (DIS_FLOAT)
+ add_definitions(-DDIS_FLOAT)
+endif()
+
+add_compile_options(-fno-exceptions)
+add_compile_options(-Os)
+add_subdirectory(src)
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_INTERPRETER_H
+#define LUCI_INTERPRETER_INTERPRETER_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#ifdef USE_STATIC_ALLOC
+#include "luci_interpreter/InterpreterConfigure.h"
+#include "memory_managers/StaticMemoryManager.h"
+#else
+#include "memory_managers/SimpleMemoryManager.h"
+#endif // USE_STATIC_ALLOC
+
+#include "loader/ModuleLoader.h"
+#include <memory>
+
+namespace luci_interpreter
+{
+
+class Interpreter
+{
+public:
+ // Construct default interpreter with dynamic allocations and with input allocations
+ explicit Interpreter(const char *model_data_raw);
+
+#ifdef USE_STATIC_ALLOC
+ // Construct interpreter with configurations
+ explicit Interpreter(const char *model_data_raw, const InterpreterConfigure &configuration);
+#endif // USE_STATIC_ALLOC
+
+ ~Interpreter();
+
+ void allocateAndWriteInputTensor(int32_t input_tensor_index, const void *data, size_t data_size);
+ uint8_t *allocateInputTensor(int32_t input_tensor_index);
+
+ uint8_t *readOutputTensor(int32_t output_tensor_index);
+
+ int32_t getInputDataSizeByIndex(int32_t input_tensor_index);
+ int32_t getOutputDataSizeByIndex(int32_t output_tensor_index);
+
+ void interpret();
+
+private:
+ // _default_memory_manager should be before _runtime_module due to
+ // the order of deletion in the destructor
+ MemoryManager _memory_manager{};
+ RuntimeModule _runtime_module{};
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_INTERPRETER_H
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ONERT_MICRO_INTERPRETER_CONFIGURE_H
+#define ONERT_MICRO_INTERPRETER_CONFIGURE_H
+
+namespace luci_interpreter
+{
+#ifdef USE_STATIC_ALLOC
+
+enum MemoryManagerType
+{
+ STATIC,
+ DYNAMIC
+};
+
+class InterpreterConfigure
+{
+public:
+ void setAllocateInputValue(bool allocate_input) { _allocate_input = allocate_input; }
+ bool getAllocateInputValue() const { return _allocate_input; }
+
+ InterpreterConfigure &setMemoryManager(MemoryManagerType mm_type)
+ {
+ switch (mm_type)
+ {
+ case MemoryManagerType::STATIC:
+ _use_static_manager = true;
+ break;
+ case MemoryManagerType::DYNAMIC:
+ _use_static_manager = false;
+ break;
+ default:
+ assert(false);
+ }
+ return *this;
+ }
+
+ // TODO: remove this method
+ InterpreterConfigure &configStaticMemoryManager(uint32_t input_buf_size, uint32_t temp_buf_size,
+ uint32_t output_buf_size)
+ {
+ assert(_use_static_manager);
+ _input_buf_size = input_buf_size;
+ _temp_buf_size = temp_buf_size;
+ _output_buf_size = output_buf_size;
+ return *this;
+ }
+
+ bool isStaticManager() const { return _use_static_manager; }
+
+private:
+ bool _use_static_manager = false;
+ bool _allocate_input = true;
+
+public:
+ // TODO: remove it and read these values from circle file
+ uint32_t _input_buf_size = 0;
+ uint32_t _temp_buf_size = 0;
+ uint32_t _output_buf_size = 0;
+};
+
+#endif
+
+} // namespace luci_interpreter
+
+#endif // ONERT_MICRO_INTERPRETER_CONFIGURE_H
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_DATATYPE_H
+#define LUCI_INTERPRETER_CORE_DATATYPE_H
+
+#include <cstdint>
+#include <cstddef>
+#include <string>
+#include <cassert>
+
+namespace luci_interpreter
+{
+// TODO check can we remove it
+/**
+ * @brief "scalar" value type
+ */
+enum class DataType
+{
+ Unknown, // Unknown type (serves as a default value)
+
+ U8, // 8-bit unsigned integer
+ U16, // 16-bit unsigned integer
+ U32, // 32-bit unsigned integer
+ U64, // 64-bit unsigned integer
+
+ S8, // 8-bit signed integer
+ S16, // 16-bit signed integer
+ S32, // 32-bit signed integer
+ S64, // 64-bit signed integer
+
+ FLOAT16, // IEEE 16-bit floating-point
+ FLOAT32, // IEEE 32-bit floating-point
+ FLOAT64, // IEEE 64-bit floating-point
+
+ // WARNING the size of Bool may vary for NN frameworks
+ // TODO we need to find a way to resolve this issue
+ BOOL, // Boolean
+};
+
+/**
+ * @brief C++ scalar type corresponding to each DataType
+ */
+template <DataType DT> struct DataTypeImpl
+{
+ // using Type = ...
+};
+
+// TODO Support other enum values
+template <> struct DataTypeImpl<DataType::S8>
+{
+ // Use C++ int8_t type for 8bit integer
+ using Type = int8_t;
+};
+
+template <> struct DataTypeImpl<DataType::U8>
+{
+ // Use C++ uint8_t type for unsigned 8bit integer
+ using Type = uint8_t;
+};
+
+template <> struct DataTypeImpl<DataType::S16>
+{
+ // Use C++ int16_t type for 16bit integer
+ using Type = int16_t;
+};
+
+template <> struct DataTypeImpl<DataType::U16>
+{
+ // Use C++ uint16_t type for unsigned 16bit integer
+ using Type = uint16_t;
+};
+
+template <> struct DataTypeImpl<DataType::S32>
+{
+ // Use C++ int32_t type for 32bit integer
+ using Type = int32_t;
+};
+
+template <> struct DataTypeImpl<DataType::U32>
+{
+ // Use C++ uint32_t type for unsigned 32bit integer
+ using Type = uint32_t;
+};
+
+template <> struct DataTypeImpl<DataType::S64>
+{
+ // Use C++ int64_t type for 64bit integer
+ using Type = int64_t;
+};
+
+template <> struct DataTypeImpl<DataType::U64>
+{
+ // Use C++ uint64_t type for unsigned 64bit integer
+ using Type = uint64_t;
+};
+
+template <> struct DataTypeImpl<DataType::FLOAT16>
+{
+ // float16 type with 16bit value, encoded with help of FP16 library
+ // https://github.com/Maratyszcza/FP16/
+ using Type = uint16_t;
+};
+
+template <> struct DataTypeImpl<DataType::FLOAT32>
+{
+ // Use C++ float type for IEEE 32-bit floating-point numbers
+ using Type = float;
+};
+
+template <> struct DataTypeImpl<DataType::FLOAT64>
+{
+ // Use C++ double type for IEEE 64-bit floating-point numbers
+ using Type = double;
+};
+
+// NOTE DataTypeImpl for BOOL is subject to change
+template <> struct DataTypeImpl<DataType::BOOL>
+{
+ // Use C++ uint8_t type for bool
+ using Type = uint8_t;
+};
+
+/**
+ * @brief Returns the size of the data type.
+ * @note If you need the size at compile time, use `sizeof(typename DataTypeImpl<DT>::Type)`.
+ */
+inline uint32_t size(DataType data_type)
+{
+ switch (data_type)
+ {
+ case DataType::S8:
+ return sizeof(DataTypeImpl<DataType::S8>::Type);
+ case DataType::U8:
+ return sizeof(DataTypeImpl<DataType::U8>::Type);
+ case DataType::S16:
+ return sizeof(DataTypeImpl<DataType::S16>::Type);
+ case DataType::U16:
+ return sizeof(DataTypeImpl<DataType::U16>::Type);
+ case DataType::S32:
+ return sizeof(DataTypeImpl<DataType::S32>::Type);
+ case DataType::U32:
+ return sizeof(DataTypeImpl<DataType::U32>::Type);
+ case DataType::S64:
+ return sizeof(DataTypeImpl<DataType::S64>::Type);
+ case DataType::U64:
+ return sizeof(DataTypeImpl<DataType::U64>::Type);
+ case DataType::FLOAT16:
+ return sizeof(DataTypeImpl<DataType::FLOAT16>::Type);
+ case DataType::FLOAT32:
+ return sizeof(DataTypeImpl<DataType::FLOAT32>::Type);
+ case DataType::FLOAT64:
+ return sizeof(DataTypeImpl<DataType::FLOAT64>::Type);
+ case DataType::BOOL:
+ return sizeof(DataTypeImpl<DataType::BOOL>::Type);
+ default:
+ // TODO Support remaining data types.
+ assert(false);
+ return UINT32_MAX; // Avoid compiler warning.
+ }
+}
+
+inline size_t getDataTypeSize(DataType data_type) { return size(data_type); }
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_DATATYPE_H
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_MICRO_INTERPRETER_PARAMS_TYPE_H__
+#define __LUCI_MICRO_INTERPRETER_PARAMS_TYPE_H__
+
+#include <vector>
+#include <cstdint>
+#include <utility>
+
+namespace luci_interpreter
+{
+// TODO check can we remove it
+enum class FusedActFunc
+{
+ UNDEFINED, // This is not defined by TFLite or Circle. This was added to
+ // prevent programming error.
+ NONE,
+ RELU,
+ RELU_N1_TO_1,
+ RELU6,
+ TANH,
+ SIGN_BIT
+};
+
+enum class Padding
+{
+ UNDEFINED, // This is not defined by TFLite. This was added to prevent programming error.
+
+ SAME,
+ VALID,
+};
+
+enum class MirrorPadMode
+{
+ UNDEFINED, // This is not defined by Circle. This was added to prevent programming error.
+
+ REFLECT,
+ SYMMETRIC,
+};
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_MICRO_INTERPRETER_PARAMS_TYPE_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_TENSOR_H
+#define LUCI_INTERPRETER_CORE_TENSOR_H
+
+#include "luci_interpreter/core/DataType.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+class Tensor
+{
+public:
+#ifndef DIS_QUANT
+ static float scale(const circle::Tensor *circle_tensor)
+ {
+ const auto *quant_params = circle_tensor->quantization();
+ if (quant_params == nullptr)
+ {
+ assert(false && "There is no quantization params");
+ return 0;
+ }
+
+ return *quant_params->scale()->cbegin();
+ }
+
+ static int32_t zero_point(const circle::Tensor *circle_tensor)
+ {
+ const auto *quant_params = circle_tensor->quantization();
+ if (quant_params == nullptr)
+ {
+ assert(false && "There is no quantization params");
+ return 0;
+ }
+
+ return *quant_params->zero_point()->cbegin();
+ }
+
+ static const std::vector<float> scales(const circle::Tensor *circle_tensor)
+ {
+ const auto *quant_params = circle_tensor->quantization();
+ if (quant_params == nullptr)
+ {
+ assert(false && "There is no quantization params");
+ return {};
+ }
+ assert(quant_params->scale() != nullptr);
+ std::vector<float> scales(quant_params->scale()->cbegin(), quant_params->scale()->cend());
+
+ return scales;
+ }
+
+ static const std::vector<int32_t> zero_points(const circle::Tensor *circle_tensor)
+ {
+ const auto *quant_params = circle_tensor->quantization();
+ if (quant_params == nullptr)
+ {
+ assert(false && "There is no quantization params");
+ return {};
+ }
+ assert(quant_params->zero_point() != nullptr);
+ std::vector<int32_t> zero_points(quant_params->zero_point()->cbegin(),
+ quant_params->zero_point()->cend());
+
+ return zero_points;
+ }
+
+ static int32_t quantized_dimension(const circle::Tensor *circle_tensor)
+ {
+ const auto *quant_params = circle_tensor->quantization();
+ if (quant_params == nullptr)
+ {
+ assert(false && "There is no quantization params");
+ return 0;
+ }
+ return quant_params->quantized_dimension();
+ }
+#endif
+
+ static DataType element_type(const circle::Tensor *circle_tensor)
+ {
+ return luci_datatype(circle_tensor->type());
+ }
+
+ static int num_dims(const circle::Tensor *circle_tensor)
+ {
+ // TODO check removing of wrap
+ auto const &const_dims = wrap(circle_tensor->shape());
+ return const_dims.size();
+ }
+
+ static int32_t dim(const circle::Tensor *circle_tensor, int i)
+ {
+ // TODO check removing of wrap
+ assert(i >= 0);
+ auto const &const_dims = wrap(circle_tensor->shape());
+ assert(i < const_dims.size());
+
+ return const_dims[i];
+ }
+
+ static int32_t num_elements(const circle::Tensor *circle_tensor)
+ {
+ int32_t result = 1;
+ auto const &const_dims = wrap(circle_tensor->shape());
+ for (const int32_t dim : const_dims)
+ {
+ result *= dim;
+ }
+ return result;
+ }
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_TENSOR_H
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_MICRO_INTERPRETER_MICRO_READER_H__
+#define __LUCI_MICRO_INTERPRETER_MICRO_READER_H__
+
+#include "luci_interpreter/core/ParamsType.h"
+#include "luci_interpreter/core/DataType.h"
+
+#include <circle-generated/circle/schema_generated.h>
+
+#include <map>
+#include <memory>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+#ifdef USE_STATIC_ALLOC
+namespace
+{
+
+using ExecutionPlanTable = std::map<uint32_t, std::vector<uint32_t>>;
+
+template <typename VECTORTYPE> uint32_t read_u32(const VECTORTYPE &buffer, uint32_t idx)
+{
+ static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
+ uint32_t val = 0;
+ val += (buffer.at(idx + 0) << 0 * 8);
+ val += (buffer.at(idx + 1) << 1 * 8);
+ val += (buffer.at(idx + 2) << 2 * 8);
+ val += (buffer.at(idx + 3) << 3 * 8);
+ return val;
+}
+
+} // namespace
+
+namespace read_metadata
+{
+
+template <typename VECTORTYPE>
+ExecutionPlanTable decode_execution_plan(const VECTORTYPE &execution_plan_data)
+{
+ static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
+ ExecutionPlanTable execution_plan_table;
+ uint32_t idx = 0;
+
+ if (execution_plan_data.size() < 4)
+ assert(false && "Op table decode error : invalid entry number");
+
+ uint32_t entry_number = read_u32(execution_plan_data, idx);
+ idx += sizeof(uint32_t);
+
+ while (idx < execution_plan_data.size())
+ {
+ if (idx + 2 * sizeof(uint32_t) > execution_plan_data.size())
+ assert(false && "Op table decode error : invalid entry item");
+
+ uint32_t id = read_u32(execution_plan_data, idx);
+ idx += sizeof(uint32_t);
+
+ uint32_t size = read_u32(execution_plan_data, idx);
+
+ if (size == 0)
+ assert(false && "Op table decode error : empty execution plan entry");
+
+ idx += sizeof(uint32_t);
+
+ if (idx + sizeof(uint32_t) * size > execution_plan_data.size())
+ assert(false && "Source table decode error : invalid entry data");
+
+ std::vector<uint32_t> execution_plan_vector;
+ uint32_t position = read_u32(execution_plan_data, idx);
+ idx += sizeof(uint32_t);
+
+ for (uint32_t j = 1; j < size; ++j)
+ {
+ uint32_t execution_plan_inform = read_u32(execution_plan_data, idx);
+ idx += sizeof(uint32_t);
+
+ execution_plan_vector.push_back(execution_plan_inform);
+ }
+
+ if (!execution_plan_table.insert({position, execution_plan_vector}).second)
+ assert(false && "Op table decode error : duplicated origin ID");
+ }
+
+ if (idx != execution_plan_data.size())
+ assert(false && "Op table decode error : data size invalid");
+
+ if (execution_plan_table.size() != entry_number)
+ assert(false && "Op table decode error : entry number invalid");
+
+ return execution_plan_table;
+}
+
+} // namespace read_metadata
+#endif
+
+DataType luci_datatype(circle::TensorType type);
+FusedActFunc luci_actfunc(circle::ActivationFunctionType type);
+Padding luci_padding(circle::Padding padding);
+MirrorPadMode luci_mirrorpad_mode(circle::MirrorPadMode mode);
+
+/**
+ * @brief Wrapper to use flatbuffers::Vector pointer as std::vector entity
+ */
+template <typename T> class VectorWrapper
+{
+public:
+ explicit VectorWrapper(const flatbuffers::Vector<T> *ptr);
+
+ const T *data() const;
+ uint32_t size() const;
+
+ using iterator = typename flatbuffers::Vector<T>::const_iterator;
+ iterator begin() const;
+ iterator end() const;
+
+ using value_type = typename flatbuffers::Vector<T>::return_type;
+ value_type at(uint32_t i) const;
+ value_type operator[](uint32_t i) const;
+
+ bool null() const;
+ bool empty() const;
+
+private:
+ const flatbuffers::Vector<T> *_vector;
+};
+
+template <typename T> VectorWrapper<T> wrap(const flatbuffers::Vector<T> *vec)
+{
+ return VectorWrapper<T>(vec);
+}
+
+/**
+ * @brief Loads Circle file and provides helpers to access attributes
+ */
+class CircleReader
+{
+public:
+ using CircleBuffers = VectorWrapper<flatbuffers::Offset<circle::Buffer>>;
+ using CircleTensors = VectorWrapper<flatbuffers::Offset<circle::Tensor>>;
+ using CircleOperators = VectorWrapper<flatbuffers::Offset<circle::Operator>>;
+ using CircleOperatorCodes = VectorWrapper<flatbuffers::Offset<circle::OperatorCode>>;
+ using CircleMetadataSet = VectorWrapper<flatbuffers::Offset<circle::Metadata>>;
+
+public:
+ CircleReader() = default;
+
+public: // direct API
+ CircleOperatorCodes opcodes() const { return wrap(_model->operator_codes()); }
+ CircleBuffers buffers() const { return wrap(_model->buffers()); }
+ CircleTensors tensors() const { return wrap(_current_subgraph->tensors()); }
+ CircleOperators operators() const { return wrap(_current_subgraph->operators()); }
+ VectorWrapper<int32_t> inputs() const { return wrap(_current_subgraph->inputs()); }
+ VectorWrapper<int32_t> outputs() const { return wrap(_current_subgraph->outputs()); }
+ circle::DataFormat data_format() const { return _current_subgraph->data_format(); }
+ CircleMetadataSet metadata() const { return wrap(_model->metadata()); }
+
+ uint32_t num_subgraph() const { return wrap(_model->subgraphs()).size(); }
+ circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
+
+public:
+ bool parse(const circle::Model *model);
+ bool select_subgraph(uint32_t subgraph);
+
+private:
+ const circle::Model *_model{nullptr};
+ const circle::SubGraph *_current_subgraph{nullptr};
+};
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_MICRO_INTERPRETER_MICRO_READER_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_MICRO_CIRCLE_MICRO_READER_HELPER_H__
+#define __LUCI_MICRO_CIRCLE_MICRO_READER_HELPER_H__
+
+#include <circle-generated/circle/schema_generated.h>
+
+#include <vector>
+
+namespace circle
+{
+
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode);
+bool is_valid(const ::circle::OperatorCode *opcode);
+bool is_custom(const ::circle::OperatorCode *opcode);
+const char *tensor_type(const ::circle::Tensor *tensor);
+
+template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
+{
+ if (flat_array == nullptr)
+ {
+ assert(false && "flat array is nullptr");
+ }
+
+ std::vector<T> ret(flat_array->Length());
+ for (uint32_t i = 0; i < flat_array->Length(); i++)
+ {
+ ret[i] = flat_array->Get(i);
+ }
+ return ret;
+}
+
+} // namespace circle
+
+#endif // __LUCI_MICRO_CIRCLE_MICRO_READER_HELPER_H__
--- /dev/null
+REGISTER_KERNEL(ADD, Add)
+REGISTER_KERNEL(ARG_MAX, ArgMax)
+REGISTER_KERNEL(AVERAGE_POOL_2D, AveragePool2D)
+REGISTER_KERNEL(BATCH_TO_SPACE_ND, BatchToSpaceND)
+REGISTER_KERNEL(CAST, Cast)
+REGISTER_KERNEL(CONCATENATION, Concatenation)
+REGISTER_KERNEL(CONV_2D, Conv2D)
+REGISTER_KERNEL(DEPTH_TO_SPACE, DepthToSpace)
+REGISTER_KERNEL(DEPTHWISE_CONV_2D, DepthwiseConv2D)
+REGISTER_KERNEL(DEQUANTIZE, Dequantize)
+REGISTER_KERNEL(DIV, Div)
+REGISTER_KERNEL(ELU, Elu)
+REGISTER_KERNEL(EXP, Exp)
+REGISTER_KERNEL(EXPAND_DIMS, ExpandDims)
+REGISTER_KERNEL(FILL, Fill)
+REGISTER_KERNEL(FLOOR, Floor)
+REGISTER_KERNEL(FLOOR_DIV, FloorDiv)
+REGISTER_KERNEL(EQUAL, Equal)
+REGISTER_KERNEL(FULLY_CONNECTED, FullyConnected)
+REGISTER_KERNEL(GREATER, Greater)
+REGISTER_KERNEL(GREATER_EQUAL, GreaterEqual)
+REGISTER_KERNEL(INSTANCE_NORM, InstanceNorm)
+REGISTER_KERNEL(L2_NORMALIZATION, L2Normalize)
+REGISTER_KERNEL(L2_POOL_2D, L2Pool2D)
+REGISTER_KERNEL(LEAKY_RELU, LeakyRelu)
+REGISTER_KERNEL(LESS, Less)
+REGISTER_KERNEL(LESS_EQUAL, LessEqual)
+REGISTER_KERNEL(LOGICAL_AND, LogicalAnd)
+REGISTER_KERNEL(LOGICAL_NOT, LogicalNot)
+REGISTER_KERNEL(LOGICAL_OR, LogicalOr)
+REGISTER_KERNEL(LOGISTIC, Logistic)
+REGISTER_KERNEL(MAXIMUM, Maximum)
+REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D)
+REGISTER_KERNEL(MINIMUM, Minimum)
+REGISTER_KERNEL(MIRROR_PAD, MirrorPad)
+REGISTER_KERNEL(MUL, Mul)
+REGISTER_KERNEL(NEG, Neg)
+REGISTER_KERNEL(NOT_EQUAL, NotEqual)
+REGISTER_KERNEL(PAD, Pad)
+REGISTER_KERNEL(PADV2, PadV2)
+REGISTER_KERNEL(PRELU, PRelu)
+REGISTER_KERNEL(QUANTIZE, Quantize)
+REGISTER_KERNEL(RESHAPE, Reshape)
+REGISTER_KERNEL(RESIZE_BILINEAR, ResizeBilinear)
+REGISTER_KERNEL(RESIZE_NEAREST_NEIGHBOR, ResizeNearestNeighbor)
+REGISTER_KERNEL(RSQRT, Rsqrt)
+REGISTER_KERNEL(SHAPE, Shape)
+REGISTER_KERNEL(SOFTMAX, Softmax)
+REGISTER_KERNEL(SPACE_TO_BATCH_ND, SpaceToBatchND)
+REGISTER_KERNEL(SPACE_TO_DEPTH, SpaceToDepth)
+REGISTER_KERNEL(STRIDED_SLICE, StridedSlice)
+REGISTER_KERNEL(SQRT, Sqrt)
+REGISTER_KERNEL(SQUARE, Square)
+REGISTER_KERNEL(SQUARED_DIFFERENCE, SquaredDifference)
+REGISTER_KERNEL(SQUEEZE, Squeeze)
+REGISTER_KERNEL(SUB, Sub)
+REGISTER_KERNEL(SVDF, SVDF)
+REGISTER_KERNEL(TANH, Tanh)
+REGISTER_KERNEL(TRANSPOSE, Transpose)
+REGISTER_KERNEL(TRANSPOSE_CONV, TransposeConv)
+REGISTER_KERNEL(UNIDIRECTIONAL_SEQUENCE_LSTM, UnidirectionalSequenceLSTM)
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+ const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::BatchToSpaceND(
+ unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+ unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &filter_shape,
+ const float *filter_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, const tflite::RuntimeShape &output_shape,
+ float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ float *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data,
+ tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape,
+ const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+ const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+ uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ uint8 *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+ scratchpad_data, nullptr);
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult,
+ const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+ const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+ const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+ int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ int8 *scratchpad_data)
+{
+ if (scratchpad_data)
+ {
+ cmsis_nn_conv_params conv_params;
+ conv_params.dilation.h = params.dilation_height_factor;
+ conv_params.dilation.w = params.dilation_width_factor;
+
+ assert(conv_params.dilation.h == 1);
+ assert(conv_params.dilation.w == 1);
+
+ conv_params.input_offset = params.input_offset;
+ conv_params.output_offset = params.output_offset;
+ conv_params.stride.h = params.stride_height;
+ conv_params.stride.w = params.stride_width;
+ conv_params.padding.h = params.padding_values.height;
+ conv_params.padding.w = params.padding_values.width;
+ conv_params.activation.min = params.quantized_activation_min;
+ conv_params.activation.max = params.quantized_activation_max;
+
+ cmsis_nn_per_channel_quant_params quant_params;
+ quant_params.multiplier = const_cast<int32_t *>(mult);
+ quant_params.shift = const_cast<int32_t *>(shifts);
+
+ assert(conv_params.activation.min <= conv_params.activation.max);
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+ const int output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+ if (bias_data)
+ {
+ assert(bias_shape.FlatSize() == output_depth);
+ }
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batch_size;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = input_depth;
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = output_depth;
+ filter_dims.h = filter_shape.Dims(1);
+ filter_dims.w = filter_shape.Dims(2);
+ filter_dims.c = input_depth;
+
+ cmsis_nn_dims bias_dims;
+ bias_dims.n = 1;
+ bias_dims.h = 1;
+ bias_dims.w = 1;
+ bias_dims.c = output_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batch_size;
+ output_dims.h = output_shape.Dims(1);
+ output_dims.w = output_shape.Dims(2);
+ output_dims.c = output_depth;
+
+ cmsis_nn_context ctx;
+ ctx.buf = scratchpad_data;
+ ctx.size = scratchpad_shape.Dims(0);
+
+ auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data,
+ &filter_dims, filter_data, &bias_dims, bias_data,
+ &output_dims, output_data);
+ assert(res == ARM_MATH_SUCCESS);
+ }
+ else
+ {
+ tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+ filter_shape, filter_data, bias_shape, bias_data,
+ output_shape, output_data);
+ }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::ConvParams ¶ms,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+{
+ cmsis_nn_conv_params conv_params;
+ conv_params.dilation.h = params.dilation_height_factor;
+ conv_params.dilation.w = params.dilation_width_factor;
+
+ if (input_data_type == luci_interpreter::DataType::S8 && conv_params.dilation.h == 1 &&
+ conv_params.dilation.w == 1)
+ {
+ const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+ const int32_t output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+ const int32_t filter_height = filter_shape.Dims(1);
+ const int32_t filter_width = filter_shape.Dims(2);
+ const int32_t output_height = output_shape.Dims(1);
+ const int32_t output_width = output_shape.Dims(2);
+
+ conv_params.input_offset = params.input_offset;
+ conv_params.output_offset = params.output_offset;
+ conv_params.stride.h = params.stride_height;
+ conv_params.stride.w = params.stride_width;
+ conv_params.padding.h = params.padding_values.height;
+ conv_params.padding.w = params.padding_values.width;
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batches;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = input_depth;
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = output_depth;
+ filter_dims.h = filter_height;
+ filter_dims.w = filter_width;
+ filter_dims.c = input_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batches;
+ output_dims.h = output_height;
+ output_dims.w = output_width;
+ output_dims.c = output_depth;
+
+ const int32_t buf_size = arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims,
+ &filter_dims, &output_dims);
+
+ luci_interpreter::Shape scratchpad_shape{buf_size};
+ scratchpad->resize(scratchpad_shape);
+ }
+ else
+ {
+ scratchpad->set_allocatable(false);
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &filter_shape,
+ const T *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+ T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation is not supported
+ assert(false && "DepthwiseConvPerChannel NYI");
+ (void)params;
+ (void)output_multiplier;
+ (void)output_shift;
+ (void)input_shape;
+ (void)output_data;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+ const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+ if (scratchpad_data)
+ {
+ cmsis_nn_dw_conv_params dw_conv_params;
+ dw_conv_params.dilation.h = params.dilation_height_factor;
+ dw_conv_params.dilation.w = params.dilation_width_factor;
+ assert(dw_conv_params.dilation.h == 1);
+ assert(dw_conv_params.dilation.w == 1);
+
+ dw_conv_params.input_offset = params.input_offset;
+ dw_conv_params.output_offset = params.output_offset;
+ dw_conv_params.stride.h = params.stride_height;
+ dw_conv_params.stride.w = params.stride_width;
+ dw_conv_params.padding.h = params.padding_values.height;
+ dw_conv_params.padding.w = params.padding_values.width;
+
+ dw_conv_params.activation.min = params.quantized_activation_min;
+ dw_conv_params.activation.max = params.quantized_activation_max;
+ dw_conv_params.ch_mult = params.depth_multiplier;
+
+ cmsis_nn_per_channel_quant_params quant_params;
+ int32_t output_multiplier = params.output_multiplier;
+ int32_t output_shift = params.output_shift;
+
+ quant_params.multiplier = &output_multiplier;
+ quant_params.shift = &output_shift;
+
+ assert(dw_conv_params.activation.min <= dw_conv_params.activation.max);
+ const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+ if (bias_data)
+ {
+ assert(bias_shape.FlatSize() == output_depth);
+ }
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batch_size;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = input_shape.Dims(3);
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = filter_shape.Dims(0);
+ filter_dims.h = filter_shape.Dims(1);
+ filter_dims.w = filter_shape.Dims(2);
+ filter_dims.c = output_depth;
+
+ cmsis_nn_dims bias_dims;
+ bias_dims.n = 1;
+ bias_dims.h = 1;
+ bias_dims.w = 1;
+ bias_dims.c = output_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batch_size;
+ output_dims.h = output_shape.Dims(1);
+ output_dims.w = output_shape.Dims(2);
+ output_dims.c = output_depth;
+
+ cmsis_nn_context ctx;
+ ctx.buf = scratchpad_data;
+ ctx.size = scratchpad_shape.Dims(0);
+
+ auto res = arm_depthwise_conv_wrapper_s8(&ctx, &dw_conv_params, &quant_params, &input_dims,
+ input_data, &filter_dims, filter_data, &bias_dims,
+ bias_data, &output_dims, output_data);
+ assert(res == ARM_MATH_SUCCESS);
+ }
+ else
+ {
+ tflite::reference_integer_ops::DepthwiseConvPerChannel(
+ params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data);
+ }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const tflite::DepthwiseParams ¶ms,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+{
+ cmsis_nn_dw_conv_params dw_conv_params;
+ dw_conv_params.dilation.h = params.dilation_height_factor;
+ dw_conv_params.dilation.w = params.dilation_width_factor;
+
+ if (input_data_type == luci_interpreter::DataType::S8 && dw_conv_params.dilation.h == 1 &&
+ dw_conv_params.dilation.w == 1)
+ {
+ const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batch_size;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = input_shape.Dims(3);
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = filter_shape.Dims(0);
+ filter_dims.h = filter_shape.Dims(1);
+ filter_dims.w = filter_shape.Dims(2);
+ filter_dims.c = output_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batch_size;
+ output_dims.h = output_shape.Dims(1);
+ output_dims.w = output_shape.Dims(2);
+ output_dims.c = output_depth;
+
+ const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
+ &dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+ auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+ luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
+ scratchpad->resize(scratchpad_shape);
+ }
+ else
+ {
+ scratchpad->set_allocatable(false);
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "PALreference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams ¶ms,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+ output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams ¶ms,
+ const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FILL_H
+#define LUCI_INTERPRETER_PAL_FILL_H
+
+#include "PALreference_ops.h"
+
+#endif // LUCI_INTERPRETER_PAL_FILL_H
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "PALreference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams ¶ms,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+ int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+ int32_t input_zero_point, int32_t output_zero_point,
+ Output *output_data)
+{
+ tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+ effective_scale_shift, input_zero_point, output_zero_point,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape,
+ const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, int16_t *activation_state_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+ int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+ int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+ const int32_t rank = params.rank;
+ const int32_t batch_size = input_shape.Dims(0);
+ const int32_t num_filters = weight_feature_shape.Dims(0);
+ const int32_t memory_size = weight_time_shape.Dims(1);
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = input_shape.Dims(0);
+ input_dims.h = input_shape.Dims(1);
+
+ cmsis_nn_dims weights_feature_dims;
+ weights_feature_dims.n = weight_feature_shape.Dims(0);
+ weights_feature_dims.h = weight_feature_shape.Dims(1);
+
+ cmsis_nn_dims weights_time_dims;
+ weights_time_dims.n = weight_time_shape.Dims(0);
+ weights_time_dims.h = weight_time_shape.Dims(1);
+
+ cmsis_nn_dims bias_dims;
+ bias_dims.n = bias_shape.Dims(0);
+
+ cmsis_nn_dims state_dims;
+ state_dims.n = batch_size;
+ state_dims.h = memory_size * num_filters;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = output_shape.Dims(0);
+ output_dims.h = output_shape.Dims(1);
+
+ cmsis_nn_svdf_params svdf_params;
+ svdf_params.rank = params.rank;
+ svdf_params.input_offset = input_zp;
+ svdf_params.output_offset = output_zp;
+
+ svdf_params.input_activation.min = INT16_MIN;
+ svdf_params.input_activation.max = INT16_MAX;
+
+ svdf_params.output_activation.min = INT8_MIN;
+ svdf_params.output_activation.max = INT8_MAX;
+
+ cmsis_nn_per_tensor_quant_params in_quant_params;
+ in_quant_params.multiplier = scale_1_a;
+ in_quant_params.shift = scale_1_b;
+
+ cmsis_nn_per_tensor_quant_params out_quant_params;
+ out_quant_params.multiplier = scale_2_a;
+ out_quant_params.shift = scale_2_b;
+
+ cmsis_nn_context scratch_ctx;
+ scratch_ctx.buf = scratchpad_data;
+
+ cmsis_nn_context scratch_output_ctx;
+ scratch_output_ctx.buf = output_temp_data;
+
+ arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params,
+ &input_dims, input_data, &state_dims, activation_state_data, &weights_feature_dims,
+ weight_feature_data, &weights_time_dims, weight_time_data, &bias_dims, bias_data,
+ &output_dims, output_data);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, float *scratchpad_data, float *activation_state_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ const int32_t rank = params.rank;
+ const int32_t batch_size = input_shape.Dims(0);
+ const int32_t input_size = input_shape.Dims(1);
+ const int32_t num_filters = weight_feature_shape.Dims(0);
+ const int32_t num_units = num_filters / rank;
+ const int32_t memory_size = weight_time_shape.Dims(1);
+
+ // Left shift the activation_state.
+ {
+ float *new_state_start = activation_state_data;
+ const float *old_state_start = activation_state_data + 1;
+ const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+ while (old_state_start != old_state_end)
+ {
+ *new_state_start++ = *old_state_start++;
+ }
+ }
+
+ // Note: no need to clear the latest activation, matmul is not accumulative.
+
+ // Compute conv1d(inputs, weights_feature).
+ // The activation_state's rightmost column is used to save current cycle
+ // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+ // having the stride equal to memory_size.
+
+ // Perform batched matrix vector multiply operation:
+ {
+ const float *matrix = weight_feature_data;
+ const float *vector = input_data;
+ float *result = &activation_state_data[memory_size - 1];
+ float *result_in_batch = result;
+ for (int i = 0; i < batch_size; ++i)
+ {
+ const float *matrix_ptr = matrix;
+ for (int j = 0; j < num_filters; ++j)
+ {
+ float dot_prod = 0.0f;
+ const float *vector_in_batch = vector + i * input_size;
+ for (int k = 0; k < input_size; ++k)
+ {
+ dot_prod += *matrix_ptr++ * *vector_in_batch++;
+ }
+ *result_in_batch = dot_prod;
+ result_in_batch += memory_size;
+ }
+ }
+ }
+
+ tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+ batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+ params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+ const luci_interpreter::DataType &input_data_type,
+ const luci_interpreter::DataType &weight_feature_data_type,
+ luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+ luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+ luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+ const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+ const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+ if (input_data_type == luci_interpreter::DataType::FLOAT32 &&
+ (weight_feature_data_type == luci_interpreter::DataType::S8 ||
+ weight_feature_data_type == luci_interpreter::DataType::U8))
+ {
+ (void)input_shape;
+ (void)weight_time_shape;
+ (void)scratchpad_3;
+ (void)scratchpad_4;
+ (void)scratchpad_5;
+ (void)scratchpad_6;
+
+ assert(false && "Hybrid type is not supported for cmsisnn");
+ }
+
+ // Resize scratchpad_1 tensor
+ scratchpad_1->resize({batch_size, num_filters});
+
+ if (input_data_type == luci_interpreter::DataType::S8)
+ {
+ // Resize scratchpad_2 for full_integer op
+ scratchpad_2->resize({batch_size, num_units});
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+#define LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+
+#include "arm_nnfunctions.h"
+#include "core/KernelParams.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
+#include "fixedpoint/fixedpoint.h"
+
+namespace luci_interpreter_pal
+{
+namespace lstm
+{
+
+inline cmsis_nn_lstm_params
+convert_lstm_params(const luci_interpreter::IntegerLSTMParams ¶ms_in, bool time_major,
+ int32_t output_zeropoint, const int32_t *input_gate_bias,
+ const int32_t *forget_gate_bias, const int32_t *cell_gate_bias,
+ const int32_t *output_gate_bias, int16_t *input_layer_norm_coefficients,
+ int16_t *forget_layer_norm_coefficients, int16_t *cell_layer_norm_coefficients,
+ int16_t *output_layer_norm_coefficients)
+{
+ cmsis_nn_lstm_params params_out;
+
+ params_out.time_major = time_major;
+
+ // Multipliers and shifts for weights
+ params_out.input_to_input_scaling.multiplier = params_in.effective_input_to_input_scale_a;
+ params_out.input_to_input_scaling.shift = params_in.effective_input_to_input_scale_b;
+ params_out.recurrent_to_input_scaling.multiplier = params_in.effective_recurrent_to_input_scale_a;
+ params_out.recurrent_to_input_scaling.shift = params_in.effective_recurrent_to_input_scale_b;
+ params_out.cell_to_input_scaling.multiplier = params_in.effective_cell_to_input_scale_a;
+ params_out.cell_to_input_scaling.shift = params_in.effective_cell_to_input_scale_b;
+ params_out.input_to_forget_scaling.multiplier = params_in.effective_input_to_forget_scale_a;
+ params_out.input_to_forget_scaling.shift = params_in.effective_input_to_forget_scale_b;
+ params_out.recurrent_to_forget_scaling.multiplier =
+ params_in.effective_recurrent_to_forget_scale_a;
+ params_out.recurrent_to_forget_scaling.shift = params_in.effective_recurrent_to_forget_scale_b;
+ params_out.cell_to_forget_scaling.multiplier = params_in.effective_cell_to_forget_scale_a;
+ params_out.cell_to_forget_scaling.shift = params_in.effective_cell_to_forget_scale_b;
+ params_out.input_to_cell_scaling.multiplier = params_in.effective_input_to_cell_scale_a;
+ params_out.input_to_cell_scaling.shift = params_in.effective_input_to_cell_scale_b;
+ params_out.recurrent_to_cell_scaling.multiplier = params_in.effective_recurrent_to_cell_scale_a;
+ params_out.recurrent_to_cell_scaling.shift = params_in.effective_recurrent_to_cell_scale_b;
+ params_out.input_to_output_scaling.multiplier = params_in.effective_input_to_output_scale_a;
+ params_out.input_to_output_scaling.shift = params_in.effective_input_to_output_scale_b;
+
+ params_out.recurrent_to_output_scaling.multiplier =
+ params_in.effective_recurrent_to_output_scale_a;
+ params_out.recurrent_to_output_scaling.shift = params_in.effective_recurrent_to_output_scale_b;
+ params_out.cell_to_output_scaling.multiplier = params_in.effective_cell_to_output_scale_a;
+ params_out.cell_to_output_scaling.shift = params_in.effective_cell_to_output_scale_b;
+ params_out.projection_scaling.multiplier = params_in.effective_proj_scale_a;
+ params_out.projection_scaling.shift = params_in.effective_proj_scale_b;
+
+ params_out.layer_norm_input_scaling.multiplier = params_in.layer_norm_input_scale_a;
+ params_out.layer_norm_input_scaling.shift = params_in.layer_norm_input_scale_b;
+ params_out.layer_norm_forget_scaling.multiplier = params_in.layer_norm_forget_scale_a;
+ params_out.layer_norm_forget_scaling.shift = params_in.layer_norm_forget_scale_b;
+ params_out.layer_norm_cell_scaling.multiplier = params_in.layer_norm_cell_scale_a;
+ params_out.layer_norm_cell_scaling.shift = params_in.layer_norm_cell_scale_b;
+ params_out.layer_norm_output_scaling.multiplier = params_in.layer_norm_output_scale_a;
+ params_out.layer_norm_output_scaling.shift = params_in.layer_norm_output_scale_b;
+
+ params_out.clip.cell = params_in.quantized_cell_clip;
+ params_out.clip.projection = params_in.quantized_proj_clip;
+
+ params_out.cell_state_shift = params_in.cell_scale;
+
+ params_out.hidden_offset = params_in.hidden_zp;
+ params_out.output_state_offset = output_zeropoint;
+
+ params_out.guard.input_variance = params_in.input_variance_guard;
+ params_out.guard.forget_variance = params_in.forget_variance_guard;
+ params_out.guard.cell_variance = params_in.cell_variance_guard;
+ params_out.guard.output_variance = params_in.output_variance_guard;
+
+ params_out.i2f_effective_bias = params_in.input_to_forget_effective_bias.data();
+ params_out.r2f_effective_bias = params_in.recurrent_to_forget_effective_bias.data();
+ params_out.i2c_effective_bias = params_in.input_to_cell_effective_bias.data();
+ params_out.r2c_effective_bias = params_in.recurrent_to_cell_effective_bias.data();
+ params_out.i2o_effective_bias = params_in.input_to_output_effective_bias.data();
+ params_out.r2o_effective_bias = params_in.recurrent_to_output_effective_bias.data();
+ params_out.i2i_effective_bias = params_in.input_to_input_effective_bias.data();
+ params_out.r2i_effective_bias = params_in.recurrent_to_input_effective_bias.data();
+ params_out.projection_effective_bias = params_in.projection_effective_bias.data();
+
+ params_out.hidden_scaling.multiplier = params_in.effective_hidden_scale_a;
+ params_out.hidden_scaling.shift = params_in.effective_hidden_scale_b;
+
+ params_out.input_gate_bias = input_gate_bias;
+ params_out.forget_gate_bias = forget_gate_bias;
+ params_out.cell_gate_bias = cell_gate_bias;
+ params_out.output_gate_bias = output_gate_bias;
+
+ params_out.layer_norm.input_weight = input_layer_norm_coefficients;
+ params_out.layer_norm.forget_weight = forget_layer_norm_coefficients;
+ params_out.layer_norm.cell_weight = cell_layer_norm_coefficients;
+ params_out.layer_norm.output_weight = output_layer_norm_coefficients;
+
+ params_out.activation.min = std::numeric_limits<int16_t>::min();
+ params_out.activation.max = std::numeric_limits<int16_t>::max();
+
+ return params_out;
+}
+
+} // namespace lstm
+
+void eval_integer_8x8_16_lstm(
+ const luci_interpreter::Tensor *input, const luci_interpreter::Tensor *input_to_input_weights,
+ const luci_interpreter::Tensor *input_to_forget_weights,
+ const luci_interpreter::Tensor *input_to_cell_weights,
+ const luci_interpreter::Tensor *input_to_output_weights,
+ const luci_interpreter::Tensor *recurrent_to_input_weights,
+ const luci_interpreter::Tensor *recurrent_to_forget_weights,
+ const luci_interpreter::Tensor *recurrent_to_cell_weights,
+ const luci_interpreter::Tensor *recurrent_to_output_weights,
+ const luci_interpreter::Tensor *cell_to_input_weights,
+ const luci_interpreter::Tensor *cell_to_forget_weights,
+ const luci_interpreter::Tensor *cell_to_output_weights,
+ const luci_interpreter::Tensor *input_layer_norm_coefficients,
+ const luci_interpreter::Tensor *forget_layer_norm_coefficients,
+ const luci_interpreter::Tensor *cell_layer_norm_coefficients,
+ const luci_interpreter::Tensor *output_layer_norm_coefficients,
+ const luci_interpreter::Tensor *input_gate_bias, const luci_interpreter::Tensor *forget_gate_bias,
+ const luci_interpreter::Tensor *cell_gate_bias, const luci_interpreter::Tensor *output_gate_bias,
+ const luci_interpreter::Tensor *projection_weights,
+ const luci_interpreter::Tensor *projection_bias,
+ const luci_interpreter::UnidirectionalSequenceLSTMParams ¶ms, bool forward_sequence,
+ bool time_major, const luci_interpreter::IntegerLSTMParams &integer_lstm_param,
+ int32_t output_state_zp, luci_interpreter::Tensor *output_state,
+ luci_interpreter::Tensor *cell_state, luci_interpreter::Tensor *output, int16_t *scratch0,
+ int16_t *scratch1, int16_t *scratch2, int16_t *scratch3, int8_t *scratch4, int32_t *scratch5)
+{
+ // CMSIS-NN does not support these configurations currently.
+ // Please use MCU kernels instead
+ const bool use_layer_norm = (forget_layer_norm_coefficients != nullptr);
+ const bool use_peephole = (cell_to_output_weights != nullptr);
+ const bool use_projection = (projection_weights != nullptr);
+ const bool use_cifg = (input_to_input_weights == nullptr);
+ const bool unsupported_config = use_layer_norm || use_peephole || use_projection || use_cifg;
+
+ if (unsupported_config)
+ {
+ assert(false && "CMSIS-NN does not support these configurations currently");
+ return;
+ }
+
+ const auto input_shape = input->shape();
+ LUCI_INTERPRETER_CHECK(input_shape.num_dims() >= 2 && input_shape.num_dims() <= 3);
+
+ cmsis_nn_lstm_context scratch_buffers;
+ scratch_buffers.input_gate = scratch0;
+ scratch_buffers.forget_gate = scratch1;
+ scratch_buffers.cell_gate = scratch2;
+ scratch_buffers.output_gate = scratch3;
+ scratch_buffers.scratch = scratch4;
+
+ cmsis_nn_lstm_params cmsis_lstm_params = lstm::convert_lstm_params(
+ integer_lstm_param, time_major, output_state_zp,
+ luci_interpreter::kernels::getTensorData<int32_t>(input_gate_bias),
+ luci_interpreter::kernels::getTensorData<int32_t>(forget_gate_bias),
+ luci_interpreter::kernels::getTensorData<int32_t>(cell_gate_bias),
+ luci_interpreter::kernels::getTensorData<int32_t>(output_gate_bias),
+ const_cast<int16_t *>(
+ luci_interpreter::kernels::getTensorData<int16_t>(input_layer_norm_coefficients)),
+ const_cast<int16_t *>(
+ luci_interpreter::kernels::getTensorData<int16_t>(forget_layer_norm_coefficients)),
+ const_cast<int16_t *>(
+ luci_interpreter::kernels::getTensorData<int16_t>(cell_layer_norm_coefficients)),
+ const_cast<int16_t *>(
+ luci_interpreter::kernels::getTensorData<int16_t>(output_layer_norm_coefficients)));
+
+ const int n_input = input_shape.dim(input_shape.num_dims() - 1);
+ int max_time, n_batch;
+ if (input_shape.num_dims() == 2)
+ {
+ max_time = 1;
+ n_batch = input_shape.dim(0);
+ }
+ else
+ {
+ max_time = (time_major) ? input_shape.dim(0) : input_shape.dim(1);
+ n_batch = (time_major) ? input_shape.dim(1) : input_shape.dim(0);
+ }
+
+ // n_cell and n_output will be the same size when there is no projection.
+ const int n_cell = input_to_output_weights->shape().dim(0);
+ const int n_output = recurrent_to_output_weights->shape().dim(1);
+
+ cmsis_nn_lstm_dims lstm_dims;
+ lstm_dims.num_inputs = n_input;
+ lstm_dims.num_outputs = n_output;
+ lstm_dims.num_batches = n_batch;
+ lstm_dims.max_time = max_time;
+
+ arm_lstm_unidirectional_s16_s8(
+ &scratch_buffers, const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(input)),
+ &lstm_dims,
+ const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(input_to_input_weights)),
+ const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(input_to_forget_weights)),
+ const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(input_to_cell_weights)),
+ const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(input_to_output_weights)),
+ const_cast<int8_t *>(
+ luci_interpreter::kernels::getTensorData<int8_t>(recurrent_to_input_weights)),
+ const_cast<int8_t *>(
+ luci_interpreter::kernels::getTensorData<int8_t>(recurrent_to_forget_weights)),
+ const_cast<int8_t *>(
+ luci_interpreter::kernels::getTensorData<int8_t>(recurrent_to_cell_weights)),
+ const_cast<int8_t *>(
+ luci_interpreter::kernels::getTensorData<int8_t>(recurrent_to_output_weights)),
+ const_cast<int16_t *>(luci_interpreter::kernels::getTensorData<int16_t>(cell_to_input_weights)),
+ const_cast<int16_t *>(
+ luci_interpreter::kernels::getTensorData<int16_t>(cell_to_forget_weights)),
+ const_cast<int16_t *>(
+ luci_interpreter::kernels::getTensorData<int16_t>(cell_to_output_weights)),
+ const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(projection_weights)),
+ &cmsis_lstm_params,
+ const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(output_state)),
+ const_cast<int16_t *>(luci_interpreter::kernels::getTensorData<int16_t>(cell_state)),
+ const_cast<int8_t *>(luci_interpreter::kernels::getTensorData<int8_t>(output)));
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <type_traits>
+
+#include "third_party/eigen3/Eigen/Core"
+#include "fixedpoint/fixedpoint.h"
+#include "ruy/profiler/instrumentation.h" // from @ruy
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/add.h"
+#include "tensorflow/lite/kernels/internal/reference/add_n.h"
+#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_matmul.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "tensorflow/lite/kernels/internal/reference/cast.h"
+#include "tensorflow/lite/kernels/internal/reference/ceil.h"
+#include "tensorflow/lite/kernels/internal/reference/comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
+#include "tensorflow/lite/kernels/internal/reference/conv.h"
+#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h"
+#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/div.h"
+#include "tensorflow/lite/kernels/internal/reference/elu.h"
+#include "tensorflow/lite/kernels/internal/reference/exp.h"
+#include "tensorflow/lite/kernels/internal/reference/fill.h"
+#include "tensorflow/lite/kernels/internal/reference/floor.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_div.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_mod.h"
+#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/lite/kernels/internal/reference/gather.h"
+#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
+#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
+#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
+#include "tensorflow/lite/kernels/internal/reference/log_softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h"
+#include "tensorflow/lite/kernels/internal/reference/mul.h"
+#include "tensorflow/lite/kernels/internal/reference/neg.h"
+#include "tensorflow/lite/kernels/internal/reference/pad.h"
+#include "tensorflow/lite/kernels/internal/reference/pooling.h"
+#include "tensorflow/lite/kernels/internal/reference/prelu.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/kernels/internal/reference/quantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reduce.h"
+#include "tensorflow/lite/kernels/internal/reference/requantize.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h"
+#include "tensorflow/lite/kernels/internal/reference/round.h"
+#include "tensorflow/lite/kernels/internal/reference/softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h"
+#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
+#include "tensorflow/lite/kernels/internal/reference/string_comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/sub.h"
+#include "tensorflow/lite/kernels/internal/reference/tanh.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h"
+#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
+#include "tensorflow/lite/kernels/internal/tensor.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+namespace tflite
+{
+
+namespace reference_ops
+{
+
+template <typename T>
+inline void Relu(const RuntimeShape &input_shape, const T *input_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const T val = input_data[i];
+ const T lower = 0;
+ const T clamped = val < lower ? lower : val;
+ output_data[i] = clamped;
+ }
+}
+
+template <typename T>
+inline void Relu1(const RuntimeShape &input_shape, const T *input_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ ruy::profiler::ScopeLabel label("Relu1 (not fused)");
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const T val = input_data[i];
+ const T upper = 1;
+ const T lower = -1;
+ const T clamped = val > upper ? upper : val < lower ? lower : val;
+ output_data[i] = clamped;
+ }
+}
+
+inline void Relu6(const RuntimeShape &input_shape, const float *input_data,
+ const RuntimeShape &output_shape, float *output_data)
+{
+ ruy::profiler::ScopeLabel label("Relu6 (not fused)");
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const float val = input_data[i];
+ const float upper = 6;
+ const float lower = 0;
+ const float clamped = val > upper ? upper : val < lower ? lower : val;
+ output_data[i] = clamped;
+ }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ReluParams ¶ms, const RuntimeShape &input_shape,
+ const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+ ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const int32 val = static_cast<int32_t>(input_data[i]);
+ int32 clamped = params.output_offset + MultiplyByQuantizedMultiplier(val - params.input_offset,
+ params.output_multiplier,
+ params.output_shift);
+ clamped = std::max(params.quantized_activation_min, clamped);
+ clamped = std::min(params.quantized_activation_max, clamped);
+ output_data[i] = static_cast<T>(clamped);
+ }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ActivationParams ¶ms, const RuntimeShape &input_shape,
+ const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+ ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ const T max_value = params.quantized_activation_max;
+ const T min_value = params.quantized_activation_min;
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const T val = input_data[i];
+ const T clamped = val > max_value ? max_value : val < min_value ? min_value : val;
+ output_data[i] = clamped;
+ }
+}
+
+// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary
+// dimensionality if the runtime code does a single loop over one dimension
+// that handles broadcasting as the base case. The code generator would then
+// generate max(D1, D2) nested for loops.
+inline void BroadcastMulFivefold(const ArithmeticParams &unswitched_params,
+ const RuntimeShape &unswitched_input1_shape,
+ const uint8 *unswitched_input1_data,
+ const RuntimeShape &unswitched_input2_shape,
+ const uint8 *unswitched_input2_data,
+ const RuntimeShape &output_shape, uint8 *output_data)
+{
+ ArithmeticParams switched_params = unswitched_params;
+ switched_params.input1_offset = unswitched_params.input2_offset;
+ switched_params.input2_offset = unswitched_params.input1_offset;
+
+ const bool use_unswitched = unswitched_params.broadcast_category ==
+ tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
+
+ const ArithmeticParams ¶ms = use_unswitched ? unswitched_params : switched_params;
+ const uint8 *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data;
+ const uint8 *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data;
+
+ // Fivefold nested loops. The second input resets its position for each
+ // iteration of the second loop. The first input resets its position at the
+ // beginning of the fourth loop. The innermost loop is an elementwise Mul of
+ // sections of the arrays.
+ uint8 *output_data_ptr = output_data;
+ const uint8 *input1_data_ptr = input1_data;
+ const uint8 *input2_data_reset = input2_data;
+ int y0 = params.broadcast_shape[0];
+ int y1 = params.broadcast_shape[1];
+ int y2 = params.broadcast_shape[2];
+ int y3 = params.broadcast_shape[3];
+ int y4 = params.broadcast_shape[4];
+ for (int i0 = 0; i0 < y0; ++i0)
+ {
+ const uint8 *input2_data_ptr;
+ for (int i1 = 0; i1 < y1; ++i1)
+ {
+ input2_data_ptr = input2_data_reset;
+ for (int i2 = 0; i2 < y2; ++i2)
+ {
+ for (int i3 = 0; i3 < y3; ++i3)
+ {
+ MulElementwise(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr);
+ input2_data_ptr += y4;
+ output_data_ptr += y4;
+ }
+ input1_data_ptr += y4;
+ }
+ }
+ input2_data_reset = input2_data_ptr;
+ }
+}
+
+inline void Mul(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape,
+ const int16 *input1_data, const RuntimeShape &input2_shape,
+ const int16 *input2_data, const RuntimeShape &output_shape, int16 *output_data)
+{
+ ruy::profiler::ScopeLabel label("Mul/Int16");
+
+ const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+ for (int i = 0; i < flat_size; i++)
+ {
+ // F0 uses 0 integer bits, range [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+ F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+ output_data[i] = unclamped_result.raw();
+ }
+}
+
+inline void Mul(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape,
+ const int16 *input1_data, const RuntimeShape &input2_shape,
+ const int16 *input2_data, const RuntimeShape &output_shape, uint8 *output_data)
+{
+ ruy::profiler::ScopeLabel label("Mul/Int16Uint8");
+ int32 output_offset = params.output_offset;
+ int32 output_activation_min = params.quantized_activation_min;
+ int32 output_activation_max = params.quantized_activation_max;
+ TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+
+ const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+ for (int i = 0; i < flat_size; i++)
+ {
+ // F0 uses 0 integer bits, range [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+ F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+ int16 rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
+ int16 clamped_result = std::min<int16>(output_activation_max - output_offset, rescaled_result);
+ clamped_result = std::max<int16>(output_activation_min - output_offset, clamped_result);
+ output_data[i] = output_offset + clamped_result;
+ }
+}
+
+inline void Sub16(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape,
+ const int16_t *input1_data, const RuntimeShape &input2_shape,
+ const int16_t *input2_data, const RuntimeShape &output_shape,
+ int16_t *output_data)
+{
+ ruy::profiler::ScopeLabel label("Sub/Int16");
+ const int input1_shift = params.input1_shift;
+ const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+ const int16 output_activation_min = params.quantized_activation_min;
+ const int16 output_activation_max = params.quantized_activation_max;
+
+ TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
+ TFLITE_DCHECK_LE(input1_shift, 0);
+ TFLITE_DCHECK_LE(params.input2_shift, 0);
+ const int16 *not_shift_input = input1_shift == 0 ? input1_data : input2_data;
+ const int16 *shift_input = input1_shift == 0 ? input2_data : input1_data;
+ const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift;
+
+ if (input1_shift == 0)
+ {
+ // F0 uses 0 integer bits, range [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+ for (int i = 0; i < flat_size; ++i)
+ {
+ F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+ F0 scaled_input =
+ F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+ F0 result = SaturatingSub(input_ready_scaled, scaled_input);
+ const int16 raw_output = result.raw();
+ const int16 clamped_output =
+ std::min(output_activation_max, std::max(output_activation_min, raw_output));
+ output_data[i] = clamped_output;
+ }
+ }
+ else
+ {
+ // F0 uses 0 integer bits, range [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+ for (int i = 0; i < flat_size; ++i)
+ {
+ F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+ F0 scaled_input =
+ F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+ F0 result = SaturatingSub(scaled_input, input_ready_scaled);
+ const int16 raw_output = result.raw();
+ const int16 clamped_output =
+ std::min(output_activation_max, std::max(output_activation_min, raw_output));
+ output_data[i] = clamped_output;
+ }
+ }
+}
+
+template <typename Scalar>
+void Pack(const PackParams ¶ms, const RuntimeShape *const *input_shapes,
+ const Scalar *const *input_data, const RuntimeShape &output_shape, Scalar *output_data)
+{
+ ruy::profiler::ScopeLabel label("Pack");
+ const int dimensions = output_shape.DimensionsCount();
+ int axis = params.axis;
+ int inputs_count = params.inputs_count;
+
+ int outer_size = 1;
+ for (int i = 0; i < axis; i++)
+ {
+ outer_size *= output_shape.Dims(i);
+ }
+ int copy_size = 1;
+ for (int i = params.axis + 1; i < dimensions; i++)
+ {
+ copy_size *= output_shape.Dims(i);
+ }
+ TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+ for (int i = 0; i < inputs_count; ++i)
+ {
+ for (int k = 0; k < outer_size; k++)
+ {
+ const Scalar *input_ptr = input_data[i] + copy_size * k;
+ int loc = k * inputs_count * copy_size + i * copy_size;
+ memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar));
+ }
+ }
+}
+
+template <typename Scalar>
+void Unpack(const UnpackParams ¶ms, const RuntimeShape &input_shape, const Scalar *input_data,
+ const RuntimeShape &output_shape, Scalar *const *output_datas)
+{
+ ruy::profiler::ScopeLabel label("Unpack");
+ const int dimensions = input_shape.DimensionsCount();
+ const int outputs_count = params.num_split;
+
+ int outer_size = 1;
+ int axis = params.axis;
+ if (axis < 0)
+ {
+ axis += dimensions;
+ }
+ TFLITE_DCHECK_GE(axis, 0);
+ TFLITE_DCHECK_LT(axis, dimensions);
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+ int copy_size = 1;
+ for (int i = axis + 1; i < dimensions; ++i)
+ {
+ copy_size *= input_shape.Dims(i);
+ }
+ TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size);
+
+ for (int i = 0; i < outputs_count; ++i)
+ {
+ for (int k = 0; k < outer_size; k++)
+ {
+ Scalar *output_ptr = output_datas[i] + copy_size * k;
+ int loc = k * outputs_count * copy_size + i * copy_size;
+ memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+ }
+ }
+}
+
+template <typename Scalar>
+void PackWithScaling(const PackParams ¶ms, const RuntimeShape *const *input_shapes,
+ const uint8 *const *input_data, const RuntimeShape &output_shape,
+ uint8 *output_data)
+{
+ ruy::profiler::ScopeLabel label("PackWithScaling");
+ const int dimensions = output_shape.DimensionsCount();
+ int axis = params.axis;
+ const int32 *input_zeropoint = params.input_zeropoint;
+ const float *input_scale = params.input_scale;
+ int inputs_count = params.inputs_count;
+ const int32 output_zeropoint = params.output_zeropoint;
+ const float output_scale = params.output_scale;
+
+ int outer_size = 1;
+ for (int i = 0; i < axis; i++)
+ {
+ outer_size *= output_shape.Dims(i);
+ }
+ int copy_size = 1;
+ for (int i = axis + 1; i < dimensions; i++)
+ {
+ copy_size *= output_shape.Dims(i);
+ }
+ TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+ Scalar *output_ptr = output_data;
+ const float inverse_output_scale = 1.f / output_scale;
+ for (int k = 0; k < outer_size; k++)
+ {
+ for (int i = 0; i < inputs_count; ++i)
+ {
+ if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale)
+ {
+ memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar));
+ }
+ else
+ {
+ assert(false);
+ const float scale = input_scale[i] * inverse_output_scale;
+ const float bias = -input_zeropoint[i] * scale;
+ auto input_ptr = input_data[i];
+ for (int j = 0; j < copy_size; ++j)
+ {
+ const int value =
+ static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+ output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
+ }
+ }
+ output_ptr += copy_size;
+ }
+ }
+}
+
+template <typename Scalar>
+void DepthConcatenation(const ConcatenationParams ¶ms, const RuntimeShape *const *input_shapes,
+ const Scalar *const *input_data, const RuntimeShape &output_shape,
+ Scalar *output_data)
+{
+ ruy::profiler::ScopeLabel label("DepthConcatenation");
+ auto params_copy = params;
+ params_copy.axis = 3;
+ Concatenation(params_copy, input_shapes, input_data, output_shape, output_data);
+}
+
+inline void LstmCell(const LstmCellParams ¶ms, const RuntimeShape &unextended_input_shape,
+ const float *input_data, const RuntimeShape &unextended_prev_activ_shape,
+ const float *prev_activ_data, const RuntimeShape &weights_shape,
+ const float *weights_data, const RuntimeShape &unextended_bias_shape,
+ const float *bias_data, const RuntimeShape &unextended_prev_state_shape,
+ const float *prev_state_data,
+ const RuntimeShape &unextended_output_state_shape, float *output_state_data,
+ const RuntimeShape &unextended_output_activ_shape, float *output_activ_data,
+ const RuntimeShape &unextended_concat_temp_shape, float *concat_temp_data,
+ const RuntimeShape &unextended_activ_temp_shape, float *activ_temp_data)
+{
+ TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+ const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+ const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+ const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+ const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+ const RuntimeShape output_state_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+ const RuntimeShape output_activ_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+ const RuntimeShape concat_temp_shape =
+ RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+ const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+ TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+ const int weights_dim_count = weights_shape.DimensionsCount();
+ const int batches = MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
+ output_state_shape, 0, output_activ_shape, 0);
+ const int height = MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
+ output_state_shape, 1, output_activ_shape, 1);
+ const int width = MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
+ output_state_shape, 2, output_activ_shape, 2);
+ const int input_depth = input_shape.Dims(3);
+ const int prev_activ_depth = prev_activ_shape.Dims(3);
+ const int total_input_depth = prev_activ_depth + input_depth;
+ TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+ TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+ const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+ TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+ TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+ const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+ 3, output_activ_shape, 3);
+ TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+
+ // Concatenate prev_activ and input data together
+ std::vector<float const *> concat_input_arrays_data;
+ std::vector<RuntimeShape const *> concat_input_arrays_shapes;
+ concat_input_arrays_data.push_back(input_data);
+ concat_input_arrays_data.push_back(prev_activ_data);
+ concat_input_arrays_shapes.push_back(&input_shape);
+ concat_input_arrays_shapes.push_back(&prev_activ_shape);
+ tflite::ConcatenationParams concat_params;
+ concat_params.axis = 3;
+ concat_params.inputs_count = concat_input_arrays_data.size();
+ Concatenation(concat_params, &(concat_input_arrays_shapes[0]), &(concat_input_arrays_data[0]),
+ concat_temp_shape, concat_temp_data);
+
+ // Fully connected
+ tflite::FullyConnectedParams fc_params;
+ fc_params.float_activation_min = std::numeric_limits<float>::lowest();
+ fc_params.float_activation_max = std::numeric_limits<float>::max();
+ FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, weights_data,
+ bias_shape, bias_data, activ_temp_shape, activ_temp_data);
+
+ // Memory state update (the LSTM "guts")
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int w = 0; w < width; ++w)
+ {
+ for (int h = 0; h < height; ++h)
+ {
+ for (int c = 0; c < output_depth; ++c)
+ {
+ const float input_gate =
+ 1.f /
+ (1.f +
+ std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 0 * output_depth + c)]));
+ const float new_input =
+ std::tanh(activ_temp_data[Offset(activ_temp_shape, b, h, w, 1 * output_depth + c)]);
+ const float forget_gate =
+ 1.f /
+ (1.f +
+ std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 2 * output_depth + c)]));
+ const float output_gate =
+ 1.f /
+ (1.f +
+ std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 3 * output_depth + c)]));
+ const float new_state =
+ input_gate * new_input +
+ forget_gate * prev_state_data[Offset(prev_state_shape, b, h, w, c)];
+ output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
+ output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
+ output_gate * std::tanh(new_state);
+ }
+ }
+ }
+ }
+}
+
+// Quantized LSTM cell implementation.
+// The quantization of the input, output arrays is as follows:
+// - The input activations are quantized as uint8 on the interval
+// [-1, 127/128].
+// The rationale for that is that is the natural interval for output
+// activations (see next point) and these need to be concatenated together.
+// We could accommodate different ranges by re-scaling, but we empirically
+// found that setting the input activations range to be [-1, 127/128] in the
+// first place, removing the need for re-scaling, greatly improves accuracy.
+// - The output activations are quantized as uint8 on the interval
+// [-1, 127/128].
+// The rationale for that is that the definition of a LSTM cell makes them
+// intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128]
+// makes for simpler, more accurate fixed-point arithmetic.
+// - The output-at-previous-timestep state array is obviously quantized as
+// the output activations.
+// - The internal LSTM memory (not the output-at-previous-timestep, the other
+// internal state array) is int16-quantized and may use any power-of-two,
+// symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call
+// StateIntegerBits below, see the below discussion of that template
+// parameter ("The StateIntegerBits template parameter").
+// - The output of the internal fully-connected node is int16-quantized
+// on the interval [-8, 8 * 32767/32768], the rationale for which is
+// explained just below ("Why [-8, 8] for fully-connected output?").
+//
+//
+// === The StateIntegerBits template parameter ===
+//
+// The StateIntegerBits template parameter controls the fixed-point format used
+// to represent the internal memory of the LSTM cell (not the
+// output-at-previous-timestep, the other internal state array). It's currently
+// a template parameter so that the model can control that. The most typical
+// value for StateIntegerBits is 4. Other plausible values are anywhere between
+// 3 and 5. We might eventually standardize on a single supported value, e.g. 4,
+// and drop that template parameter. The reason why it can't be a runtime
+// parameter is that this controls the fixed-point format used, i.e. we need to
+// generate actually different code based on it. In particular, we generate code
+// for a fixed-point tanh() implementation for that format, which internally
+// uses a fixed-point exp() implementation, which internally uses a
+// barrel-shifter with a number of steps that depends on StateIntegerBits.
+// Another consequence of that is that a higher value of StateIntegerBits
+// results in a more expensive implementation (more barrel shifter steps
+// needed).
+//
+//
+// === Why [-8, 8] for fully-connected output? ===
+//
+// This array is only fed to Logistic and Tanh functions, for which
+// the quantized implementation will want to use fixed-point arithmetic,
+// requiring a power-of-two representation interval. Thus, we should right
+// away quantize this array to a power-of-two interval; otherwise,
+// implementation will need to rescale that, losing any benefit that a tighter
+// representation interval might otherwise yield, while introducing some
+// numerical error and computational overhead.
+//
+// Now, Logistic and Tanh
+// are nearly constant (nearly equal to their horizontal asymptotes)
+// outside of a small bounded interval around 0:
+//
+// Logistic(4) = 1 - 1.8e-2 Tanh(4) = 1 - 6.7e-4
+// Logistic(8) = 1 - 3.4e-4 Tanh(8) = 1 - 2.3e-7
+// Logistic(16) = 1 - 1.1e-7 Tanh(16) = 1 - 2.5e-14
+//
+// From this, we see that clamping to [-4, 4] would be too inaccurate
+// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision)
+// while clamping to [-16, 16] would make no difference even in float32.
+// However, for a fixed-point implementation in 16-bit integers, using 5
+// integer bits to represent the [-16, 16] range would leave only 11
+// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
+// representable values. Notice that is higher than the
+// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
+// Using [-8, 8] thus seems like the better compromise overall, enjoying
+// an increment of 2.4e-4 between representable values and a worst-case
+// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with
+// [-16, 16].
+//
+// Moreover, all other things being equal, it is nice to choose the narrower
+// representation range, as that makes the implementation of fixed-point
+// math functions a little cheaper (each integer bit requires an additional
+// barrel-shifter atep in the implementation of exp(-x)). That is further
+// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make
+// sense for 32-bit float or 32-bit fixed-point quantization, but we are
+// aiming for 16-bit fixed-point quantization of these internal nodes here.
+//
+template <int StateIntegerBits>
+inline void
+LstmCell(const LstmCellParams ¶ms, const RuntimeShape &unextended_input_shape,
+ const uint8 *input_data_uint8, const RuntimeShape &unextended_prev_activ_shape,
+ const uint8 *prev_activ_data_uint8, const RuntimeShape &weights_shape,
+ const uint8 *weights_data_uint8, const RuntimeShape &unextended_bias_shape,
+ const int32 *bias_data_int32, const RuntimeShape &unextended_prev_state_shape,
+ const int16 *prev_state_data_int16, const RuntimeShape &unextended_output_state_shape,
+ int16 *output_state_data_int16, const RuntimeShape &unextended_output_activ_shape,
+ uint8 *output_activ_data_uint8, const RuntimeShape &unextended_concat_temp_shape,
+ uint8 *concat_temp_data_uint8, const RuntimeShape &unextended_activ_temp_shape,
+ int16 *activ_temp_data_int16, void *gemmlowp_context)
+{
+ (void)gemmlowp_context; // only used in optimized code.
+ int32 weights_zero_point = params.weights_zero_point;
+ int32 accum_multiplier = params.accum_multiplier;
+ int accum_shift = params.accum_shift;
+ TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+ const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+ const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+ const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+ const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+ const RuntimeShape output_state_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+ const RuntimeShape output_activ_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+ const RuntimeShape concat_temp_shape =
+ RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+ const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+ TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+ // Gather dimensions information, and perform consistency checks.
+ const int weights_dim_count = weights_shape.DimensionsCount();
+ const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, prev_activ_shape, prev_state_shape,
+ output_state_shape, output_activ_shape);
+ const int input_depth = input_shape.Dims(3);
+ const int prev_activ_depth = prev_activ_shape.Dims(3);
+ const int total_input_depth = prev_activ_depth + input_depth;
+ TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+ const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+ TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+ TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+ TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+ const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+ 3, output_activ_shape, 3);
+ TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+ const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
+ const int fc_output_depth =
+ MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
+ const int fc_accum_depth = total_input_depth;
+ TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
+
+ // Depth-concatenate prev_activ and input data together.
+ uint8 const *concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8};
+ const RuntimeShape *concat_input_arrays_shapes[2] = {&input_shape, &prev_activ_shape};
+ tflite::ConcatenationParams concat_params;
+ concat_params.axis = 3;
+ concat_params.inputs_count = 2;
+ Concatenation(concat_params, concat_input_arrays_shapes, concat_input_arrays_data,
+ concat_temp_shape, concat_temp_data_uint8);
+
+ // Implementation of the fully connected node inside the LSTM cell.
+ // The operands are 8-bit integers, the accumulators are internally 32bit
+ // integers, and the output is 16-bit fixed-point with 3 integer bits so
+ // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
+ // is explained in the function comment above.
+ for (int b = 0; b < fc_batches; ++b)
+ {
+ for (int out_c = 0; out_c < fc_output_depth; ++out_c)
+ {
+ // Internal accumulation.
+ // Initialize accumulator with the bias-value.
+ int32 accum = bias_data_int32[out_c];
+ // Accumulation loop.
+ for (int d = 0; d < fc_accum_depth; ++d)
+ {
+ int16 input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
+ int16 weights_val = weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
+ accum += input_val * weights_val;
+ }
+ // Down-scale the final int32 accumulator to the scale used by our
+ // (16-bit, using 3 integer bits) fixed-point format. The quantized
+ // multiplier and shift here have been pre-computed offline
+ // (e.g. by toco).
+ accum = MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
+ // Saturate, cast to int16, and store to the temporary activations array.
+ accum = std::max(-32768, std::min(32767, static_cast<int>(accum)));
+ activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
+ }
+ }
+
+ // Rest of the LSTM cell: tanh and logistic math functions, and some adds
+ // and muls, all done in 16-bit fixed-point.
+ for (int b = 0; b < outer_size; ++b)
+ {
+ for (int c = 0; c < output_depth; ++c)
+ {
+ // Define the fixed-point data types that we will use here. All use
+ // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
+ // They only differ by the number of integral vs. fractional bits,
+ // determining the range of values that they can represent.
+ //
+ // F0 uses 0 integer bits, range [-1, 1].
+ // This is the return type of math functions such as tanh, logistic,
+ // whose range is in [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+ // F3 uses 3 integer bits, range [-8, 8].
+ // This is the range of the previous fully-connected node's output,
+ // which is our input here.
+ using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
+ // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
+ // 2^StateIntegerBits]. It's used to represent the internal state, whose
+ // number of integer bits is currently dictated by the model. See comment
+ // on the StateIntegerBits template parameter above.
+ using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
+ // Implementation of input gate, using fixed-point logistic function.
+ F3 input_gate_input =
+ F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
+ F0 input_gate_output = gemmlowp::logistic(input_gate_input);
+ // Implementation of input modulation gate, using fixed-point tanh
+ // function.
+ F3 input_modulation_gate_input =
+ F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
+ F0 input_modulation_gate_output = gemmlowp::tanh(input_modulation_gate_input);
+ // Implementation of forget gate, using fixed-point logistic function.
+ F3 forget_gate_input =
+ F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
+ F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
+ // Implementation of output gate, using fixed-point logistic function.
+ F3 output_gate_input =
+ F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
+ F0 output_gate_output = gemmlowp::logistic(output_gate_input);
+ // Implementation of internal multiplication nodes, still in fixed-point.
+ F0 input_times_input_modulation = input_gate_output * input_modulation_gate_output;
+ FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
+ FS prev_state_times_forget_state = forget_gate_output * prev_state;
+ // Implementation of internal addition node, saturating.
+ FS new_state =
+ gemmlowp::SaturatingAdd(gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
+ prev_state_times_forget_state);
+ // Implementation of last internal Tanh node, still in fixed-point.
+ // Since a Tanh fixed-point implementation is specialized for a given
+ // number or integer bits, and each specialization can have a substantial
+ // code size, and we already used above a Tanh on an input with 3 integer
+ // bits, and per the table in the above function comment there is no
+ // significant accuracy to be lost by clamping to [-8, +8] for a
+ // 3-integer-bits representation, let us just do that. This helps people
+ // porting this to targets where code footprint must be minimized.
+ F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
+ F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
+ // Store the new internal state back to memory, as 16-bit integers.
+ // Note: here we store the original value with StateIntegerBits, not
+ // the rescaled 3-integer-bits value fed to tanh.
+ output_state_data_int16[b * output_depth + c] = new_state.raw();
+ // Down-scale the output activations to 8-bit integers, saturating,
+ // and store back to memory.
+ int16 rescaled_output_activ = gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
+ int16 clamped_output_activ =
+ std::max<int16>(-128, std::min<int16>(127, rescaled_output_activ));
+ output_activ_data_uint8[b * output_depth + c] = 128 + clamped_output_activ;
+ }
+ }
+}
+
+template <typename Scalar>
+void Split(const SplitParams ¶ms, const RuntimeShape &input_shape, const Scalar *input_data,
+ const RuntimeShape *const *output_shapes, Scalar *const *output_data)
+{
+ ruy::profiler::ScopeLabel label("Split");
+ const int split_dimensions = input_shape.DimensionsCount();
+ int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis;
+ int outputs_count = params.num_split;
+ TFLITE_DCHECK_LT(axis, split_dimensions);
+
+ int64_t split_size = 0;
+ for (int i = 0; i < outputs_count; i++)
+ {
+ TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions);
+ for (int j = 0; j < split_dimensions; j++)
+ {
+ if (j != axis)
+ {
+ MatchingDim(*output_shapes[i], j, input_shape, j);
+ }
+ }
+ split_size += output_shapes[i]->Dims(axis);
+ }
+ TFLITE_DCHECK_EQ(split_size, input_shape.Dims(axis));
+ int64_t outer_size = 1;
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+ // For all output arrays,
+ // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+ int64_t base_inner_size = 1;
+ for (int i = axis + 1; i < split_dimensions; ++i)
+ {
+ base_inner_size *= input_shape.Dims(i);
+ }
+
+ const Scalar *input_ptr = input_data;
+ for (int k = 0; k < outer_size; k++)
+ {
+ for (int i = 0; i < outputs_count; ++i)
+ {
+ const int copy_size = output_shapes[i]->Dims(axis) * base_inner_size;
+ memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar));
+ input_ptr += copy_size;
+ }
+ }
+}
+
+inline int NodeOffset(int b, int h, int w, int height, int width)
+{
+ return (b * height + h) * width + w;
+}
+
+inline void LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params,
+ const RuntimeShape &input_shape, const float *input_data,
+ const RuntimeShape &output_shape, float *output_data)
+{
+ const int trailing_dim = input_shape.DimensionsCount() - 1;
+ const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+ const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int c = 0; c < depth; ++c)
+ {
+ const int begin_input_c = std::max(0, static_cast<int>(c - op_params.range));
+ const int end_input_c = std::min(depth, static_cast<int>(c + op_params.range));
+ float accum = 0.f;
+ for (int input_c = begin_input_c; input_c < end_input_c; ++input_c)
+ {
+ const float input_val = input_data[i * depth + input_c];
+ accum += input_val * input_val;
+ }
+ const float multiplier = std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta);
+ output_data[i * depth + c] = input_data[i * depth + c] * multiplier;
+ }
+ }
+}
+
+inline void Dequantize(const RuntimeShape &input_shape, const Eigen::half *input_data,
+ const RuntimeShape &output_shape, float *output_data)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; i++)
+ {
+ output_data[i] = static_cast<float>(input_data[i]);
+ }
+}
+
+inline void FakeQuant(const tflite::FakeQuantParams &op_params, const RuntimeShape &input_shape,
+ const float *input_data, const RuntimeShape &output_shape, float *output_data)
+{
+ ruy::profiler::ScopeLabel label("FakeQuant");
+ float rmin = op_params.minmax.min;
+ float rmax = op_params.minmax.max;
+ int num_bits = op_params.num_bits;
+ // 0 should always be a representable value. Let's assume that the initial
+ // min,max range contains 0.
+ TFLITE_DCHECK_LE(rmin, 0.0f);
+ TFLITE_DCHECK_GE(rmax, 0.0f);
+ TFLITE_DCHECK_LT(rmin, rmax);
+
+ // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor.
+ int quant_min = 0;
+ int quant_max = (1 << num_bits) - 1;
+ float nudged_min, nudged_max, nudged_scale;
+ NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale);
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ FakeQuantizeArray(nudged_scale, nudged_min, nudged_max, input_data, output_data, flat_size);
+}
+
+// Common subroutine for both `GatherNd` and `GatherNdString`.
+struct GatherNdHelperResult
+{
+ int n_slices;
+ int slice_size;
+ int indices_nd;
+ std::vector<int> dims_to_count;
+};
+
+// Returns common values being used on both `GatherNd` and `GatherNdString`.
+inline GatherNdHelperResult GatherNdHelper(const RuntimeShape ¶ms_shape,
+ const RuntimeShape &indices_shape)
+{
+ GatherNdHelperResult ret;
+ ret.n_slices = 1;
+ ret.slice_size = 1;
+ const int indices_dims = indices_shape.DimensionsCount();
+ ret.indices_nd = indices_shape.Dims(indices_dims - 1);
+ const int params_dims = params_shape.DimensionsCount();
+ for (int i = 0; i < indices_dims - 1; ++i)
+ {
+ ret.n_slices *= indices_shape.Dims(i);
+ }
+ for (int i = ret.indices_nd; i < params_dims; ++i)
+ {
+ ret.slice_size *= params_shape.Dims(i);
+ }
+
+ int remain_flat_size = params_shape.FlatSize();
+ ret.dims_to_count = std::vector<int>(ret.indices_nd, 0);
+ for (int i = 0; i < ret.indices_nd; ++i)
+ {
+ ret.dims_to_count[i] = remain_flat_size / params_shape.Dims(i);
+ remain_flat_size = ret.dims_to_count[i];
+ }
+
+ return ret;
+}
+
+template <typename ParamsT, typename IndicesT = int32>
+inline void GatherNd(const RuntimeShape ¶ms_shape, const ParamsT *params_data,
+ const RuntimeShape &indices_shape, const IndicesT *indices_data,
+ const RuntimeShape &output_shape, ParamsT *output_data)
+{
+ ruy::profiler::ScopeLabel label("GatherNd");
+
+ const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+ for (int i = 0; i < res.n_slices; ++i)
+ {
+ int from_pos = 0;
+ for (int j = 0; j < res.indices_nd; ++j)
+ {
+ from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+ }
+ std::memcpy(output_data + i * res.slice_size, params_data + from_pos,
+ sizeof(ParamsT) * res.slice_size);
+ }
+}
+
+#ifndef TF_LITE_STATIC_MEMORY
+template <typename IndicesT = int32>
+inline void GatherNdString(const RuntimeShape ¶ms_shape, const TfLiteTensor *params_data,
+ const RuntimeShape &indices_shape, const IndicesT *indices_data,
+ const RuntimeShape &output_shape, TfLiteTensor *output_data)
+{
+ ruy::profiler::ScopeLabel label("GatherNdString");
+
+ const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+ DynamicBuffer buffer;
+ for (int i = 0; i < res.n_slices; ++i)
+ {
+ int from_pos = 0;
+ for (int j = 0; j < res.indices_nd; ++j)
+ {
+ from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+ }
+ for (int j = 0; j < res.slice_size; ++j)
+ {
+ buffer.AddString(GetString(params_data, from_pos + j));
+ }
+ }
+ buffer.WriteToTensor(output_data, /*new_shape=*/nullptr);
+}
+#endif
+
+template <typename IndicesT, typename UpdatesT>
+inline void ScatterNd(const RuntimeShape &indices_shape, const IndicesT *indices_data,
+ const RuntimeShape &updates_shape, const UpdatesT *updates_data,
+ const RuntimeShape &output_shape, UpdatesT *output_data)
+{
+ ruy::profiler::ScopeLabel label("ScatterNd");
+
+ int n_slices = 1;
+ int slice_size = 1;
+ const int outer_dims = indices_shape.DimensionsCount() - 1;
+ const int indices_nd = indices_shape.Dims(outer_dims);
+ const int updates_dims = updates_shape.DimensionsCount();
+ for (int i = 0; i < outer_dims; ++i)
+ {
+ n_slices *= indices_shape.Dims(i);
+ }
+ for (int i = outer_dims; i < updates_dims; ++i)
+ {
+ slice_size *= updates_shape.Dims(i);
+ }
+
+ int output_flat_size = output_shape.FlatSize();
+ int remain_flat_size = output_flat_size;
+ std::vector<int> dims_to_count(indices_nd, 0);
+ for (int i = 0; i < indices_nd; ++i)
+ {
+ dims_to_count[i] = remain_flat_size / output_shape.Dims(i);
+ remain_flat_size = dims_to_count[i];
+ }
+
+ memset(output_data, 0, sizeof(UpdatesT) * output_flat_size);
+ for (int i = 0; i < n_slices; ++i)
+ {
+ int to_pos = 0;
+ for (int j = 0; j < indices_nd; ++j)
+ {
+ IndicesT idx = indices_data[i * indices_nd + j];
+ TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j));
+ to_pos += idx * dims_to_count[j];
+ }
+ for (int j = 0; j < slice_size; j++)
+ {
+ output_data[to_pos + j] += updates_data[i * slice_size + j];
+ }
+ }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+ const RuntimeShape &output_shape, SequentialTensorWriter<T> *writer)
+{
+ const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape);
+ TFLITE_DCHECK_LE(op_params.begin_count, 5);
+ TFLITE_DCHECK_LE(op_params.size_count, 5);
+ const int begin_count = op_params.begin_count;
+ const int size_count = op_params.size_count;
+ // We front-pad the begin and size vectors.
+ std::array<int, 5> start;
+ std::array<int, 5> stop;
+ for (int i = 0; i < 5; ++i)
+ {
+ int padded_i = 5 - i;
+ start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
+ stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
+ ? ext_shape.Dims(i)
+ : start[i] + op_params.size[size_count - padded_i];
+ }
+
+ for (int i0 = start[0]; i0 < stop[0]; ++i0)
+ {
+ for (int i1 = start[1]; i1 < stop[1]; ++i1)
+ {
+ for (int i2 = start[2]; i2 < stop[2]; ++i2)
+ {
+ for (int i3 = start[3]; i3 < stop[3]; ++i3)
+ {
+ for (int i4 = start[4]; i4 < stop[4]; ++i4)
+ {
+ writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4));
+ }
+ }
+ }
+ }
+ }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+ const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+ SequentialTensorWriter<T> writer(input_data, output_data);
+ return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+ const TfLiteTensor *input, const RuntimeShape &output_shape, TfLiteTensor *output)
+{
+ SequentialTensorWriter<T> writer(input, output);
+ return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+ auto min_value = input2_data[0];
+ for (int i = 0; i < flat_size; i++)
+ {
+ output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i];
+ }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+ const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+ // Drop shape of second input: not needed.
+ Minimum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T>
+void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+ auto max_value = input2_data[0];
+ for (int i = 0; i < flat_size; i++)
+ {
+ output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i];
+ }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+ const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+ // Drop shape of second input: not needed.
+ Maximum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T1, typename T2, typename T3>
+void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data,
+ const RuntimeShape &output_shape, T2 *output_data)
+{
+ ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, std::greater<T1>());
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T1, typename T2, typename T3>
+inline void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data,
+ const RuntimeShape &input2_shape, const T3 *input2_data,
+ const RuntimeShape &output_shape, T2 *output_data)
+{
+ // Drop shape of second input: not needed.
+ ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename D, typename T>
+void Select(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+ const RuntimeShape &input_x_shape, const T *input_x_data,
+ const RuntimeShape &input_y_shape, const T *input_y_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ int64_t flatsize;
+ // Allow select operator executions on mixed scalar tensors and one element
+ // tensors.
+ if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 &&
+ input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1)
+ {
+ flatsize = 1;
+ }
+ else
+ {
+ flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape);
+ }
+ for (int64_t i = 0; i < flatsize; ++i)
+ {
+ output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i];
+ }
+}
+
+template <typename D, typename T>
+void RankOneSelect(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+ const RuntimeShape &input_x_shape, const T *input_x_data,
+ const RuntimeShape &input_y_shape, const T *input_y_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int64_t outer_size = input_condition_shape.FlatSize();
+ int64_t inner_size;
+ if (input_condition_shape.DimensionsCount() == 0)
+ {
+ inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
+ }
+ else
+ {
+ TFLITE_DCHECK_EQ(MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), outer_size);
+ inner_size = MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
+ }
+
+ int64_t offset = 0;
+ for (int64_t i = 0; i < outer_size; i++)
+ {
+ const T *input_data = input_condition_data[i] ? input_x_data : input_y_data;
+ memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
+ offset += inner_size;
+ }
+}
+
+template <typename D, typename T>
+void BroadcastSelect4DSlow(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+ const RuntimeShape &input_x_shape, const T *input_x_data,
+ const RuntimeShape &input_y_shape, const T *input_y_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
+
+ const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
+
+ NdArrayDesc<4> desc_condition;
+ NdArrayDesc<4> desc_x;
+ NdArrayDesc<4> desc_y;
+ NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, input_y_shape,
+ &desc_condition, &desc_x, &desc_y);
+
+ // In Tensorflow, the dimensions are canonically named (batch_number, row,
+ // col, channel), with extents (batches, height, width, depth), with the
+ // trailing dimension changing most rapidly (channels has the smallest
+ // stride, typically 1 element).
+ //
+ // In generated C code, we store arrays with the dimensions reversed. The
+ // first dimension has smallest stride.
+ //
+ // We name our variables by their Tensorflow convention, but generate C code
+ // nesting loops such that the innermost loop has the smallest stride for
+ // the best cache behavior.
+ for (int b = 0; b < extended_output_shape.Dims(0); ++b)
+ {
+ for (int y = 0; y < extended_output_shape.Dims(1); ++y)
+ {
+ for (int x = 0; x < extended_output_shape.Dims(2); ++x)
+ {
+ for (int c = 0; c < extended_output_shape.Dims(3); ++c)
+ {
+ const int condition_index = SubscriptToIndex(desc_condition, b, y, x, c);
+ const int x_index = SubscriptToIndex(desc_x, b, y, x, c);
+ const int y_index = SubscriptToIndex(desc_y, b, y, x, c);
+ output_data[Offset(extended_output_shape, b, y, x, c)] =
+ input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index];
+ }
+ }
+ }
+ }
+}
+
+template <typename D, typename T>
+void SelectTrueCoords(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+ T *output_data)
+{
+ const size_t size = input_condition_shape.FlatSize();
+ if (size == 0)
+ {
+ // Dimension is zero, in which case we don't need to output.
+ return;
+ }
+ const size_t cond_rank = input_condition_shape.DimensionsCount();
+
+ std::vector<int> dims_to_count(cond_rank, 0);
+ int cur_flat_size = size;
+ for (int i = 0; i < cond_rank; ++i)
+ {
+ dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i);
+ cur_flat_size = dims_to_count[i];
+ }
+
+ int output_index = 0;
+ for (int i = 0; i < size; ++i)
+ {
+ if (input_condition_data[i])
+ {
+ // Insert the coordinate of the current item (row major) into output.
+ int flat_index = i;
+ for (int j = 0; j < cond_rank; ++j)
+ {
+ int coord_j = flat_index / dims_to_count[j];
+ output_data[output_index * cond_rank + j] = coord_j;
+ flat_index %= dims_to_count[j];
+ }
+ output_index++;
+ }
+ }
+}
+
+// For easy implementation, the indices is always a vector of size-4 vectors.
+template <typename T, typename TI>
+inline void SparseToDense(const std::vector<std::vector<TI>> &indices, const T *values,
+ T default_value, bool value_is_scalar,
+ const RuntimeShape &unextended_output_shape, T *output_data)
+{
+ TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+ const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+ const int value_count = indices.size();
+
+ // First fill the output_data with default value.
+ const int num_elements = output_shape.FlatSize();
+ for (int i = 0; i < num_elements; ++i)
+ {
+ output_data[i] = default_value;
+ }
+
+ // Special handle for value is scalar case to avoid checking the boolean
+ // condition within the loop every time.
+ if (value_is_scalar)
+ {
+ for (int i = 0; i < value_count; ++i)
+ {
+ const std::vector<TI> &index = indices[i];
+ TFLITE_DCHECK_EQ(index.size(), 4);
+ const T value = *values; // just use the first value.
+ output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+ }
+ return;
+ }
+
+ // Go through the values and indices to fill the sparse values.
+ for (int i = 0; i < value_count; ++i)
+ {
+ const std::vector<TI> &index = indices[i];
+ TFLITE_DCHECK_EQ(index.size(), 4);
+ const T value = values[i];
+ output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+ }
+}
+
+template <typename T>
+inline void Pow(const RuntimeShape &input1_shape, const T *input1_data,
+ const RuntimeShape &input2_shape, const T *input2_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ output_data[i] = std::pow(input1_data[i], input2_data[i]);
+ }
+}
+
+template <typename T>
+inline void BroadcastPow4DSlow(const RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const RuntimeShape &unextended_input2_shape, const T *input2_data,
+ const RuntimeShape &unextended_output_shape, T *output_data)
+{
+ TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+ const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+ NdArrayDesc<4> desc1;
+ NdArrayDesc<4> desc2;
+ NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
+ &desc2);
+
+ for (int b = 0; b < output_shape.Dims(0); ++b)
+ {
+ for (int y = 0; y < output_shape.Dims(1); ++y)
+ {
+ for (int x = 0; x < output_shape.Dims(2); ++x)
+ {
+ for (int c = 0; c < output_shape.Dims(3); ++c)
+ {
+ auto out_idx = Offset(output_shape, b, y, x, c);
+ auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+ auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+ auto in1_val = input1_data[in1_idx];
+ auto in2_val = input2_data[in2_idx];
+ output_data[out_idx] = std::pow(in1_val, in2_val);
+ }
+ }
+ }
+ }
+}
+
+template <typename Scalar>
+void Reverse(int axis, const RuntimeShape &input_shape, const Scalar *input_data,
+ const RuntimeShape &output_shape, Scalar *output_data)
+{
+ ruy::profiler::ScopeLabel label("Reverse");
+
+ int outer_size = 1;
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+
+ int copy_size = 1;
+ for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i)
+ {
+ copy_size *= input_shape.Dims(i);
+ }
+
+ const int dims_at_axis = input_shape.Dims(axis);
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int j = 0; j < dims_at_axis; ++j)
+ {
+ const int start_pos = (i * dims_at_axis + j) * copy_size;
+ Scalar *output_ptr = output_data + start_pos;
+ int loc = (i * dims_at_axis + dims_at_axis - j - 1) * copy_size;
+ memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+ }
+ }
+}
+
+template <typename Scalar, typename TS>
+void ReverseSequence(const TS *seq_lengths, const int seq_dim, const int batch_dim,
+ const RuntimeShape &input_shape, const Scalar *input_data,
+ const RuntimeShape &output_shape, Scalar *output_data)
+{
+ ruy::profiler::ScopeLabel label("ReverseSequence");
+
+ int outer_size = 1;
+ int outer_dim = std::min(batch_dim, seq_dim);
+ int medium_dim = std::max(batch_dim, seq_dim);
+ for (int i = 0; i < outer_dim; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+
+ int medium_size = 1;
+ for (int i = outer_dim + 1; i < medium_dim; ++i)
+ {
+ medium_size *= input_shape.Dims(i);
+ }
+
+ int copy_size = 1;
+ for (int i = medium_dim + 1; i < input_shape.DimensionsCount(); ++i)
+ {
+ copy_size *= input_shape.Dims(i);
+ }
+
+ const int dims_at_outer_dim = input_shape.Dims(outer_dim);
+ const int dims_at_medium_dim = input_shape.Dims(medium_dim);
+
+ Scalar *output_ptr;
+ if (batch_dim > seq_dim)
+ {
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int j = 0; j < dims_at_outer_dim; ++j)
+ {
+ const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+ for (int p = 0; p < medium_size; ++p)
+ {
+ for (int q = 0; q < dims_at_medium_dim; ++q)
+ {
+ const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+ const Scalar *in_ptr = input_data + in_pos;
+ int sl = seq_lengths[q] - 1;
+ if (j > sl)
+ {
+ output_ptr = output_data + in_pos;
+ }
+ else
+ {
+ const int out_pos_base = (i * dims_at_outer_dim + sl - j) * medium_size;
+ const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+ output_ptr = output_data + out_pos;
+ }
+ memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+ }
+ }
+ }
+ }
+ }
+ else if (batch_dim < seq_dim)
+ {
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int j = 0; j < dims_at_outer_dim; ++j)
+ {
+ const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+ int sl = seq_lengths[j] - 1;
+ const int out_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+ for (int p = 0; p < medium_size; ++p)
+ {
+ for (int q = 0; q < dims_at_medium_dim; ++q)
+ {
+ const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+ const Scalar *in_ptr = input_data + in_pos;
+ if (q > sl)
+ {
+ output_ptr = output_data + in_pos;
+ }
+ else
+ {
+ const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + sl - q) * copy_size;
+ output_ptr = output_data + out_pos;
+ }
+ memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+ }
+ }
+ }
+ }
+ }
+}
+
+template <typename T>
+inline void SegmentSum(const RuntimeShape &input_shape, const T *input_data,
+ const RuntimeShape &segment_ids_shape, const int32_t *segment_ids_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int segment_flat_size = MatchingFlatSizeSkipDim(input_shape, 0, output_shape);
+
+ memset(output_data, 0, sizeof(T) * output_shape.FlatSize());
+
+ for (int i = 0; i < input_shape.Dims(0); i++)
+ {
+ int output_index = segment_ids_data[i];
+ for (int j = 0; j < segment_flat_size; ++j)
+ {
+ output_data[output_index * segment_flat_size + j] += input_data[i * segment_flat_size + j];
+ }
+ }
+}
+
+} // namespace reference_ops
+} // namespace tflite
+
+#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
--- /dev/null
+macro(initialize_pal)
+ nnas_find_package(TensorFlowSource EXACT 2.6.0 REQUIRED)
+ nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 REQUIRED)
+ nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 REQUIRED)
+ nnas_find_package(TensorFlowRuySource EXACT 2.6.0 REQUIRED)
+ nnas_find_package(CMSIS-NN EXACT 4.0.0 REQUIRED)
+
+ if (NOT TensorFlowSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowGEMMLowpSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowEigenSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: Eigen not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowRuySource_FOUND)
+ message(STATUS "Skipping luci-interpreter: Ruy not found")
+ return()
+ endif ()
+
+ if (NOT CMSISSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: CMSISSource not found")
+ return()
+ endif ()
+
+ if (NOT CMSIS_NNSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: CMSIS-NN not found")
+ return()
+ endif ()
+
+ set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+ target_include_directories(${TGT} PRIVATE "${PAL}")
+ target_include_directories(${TGT} PRIVATE
+ "${TensorFlowRuySource_DIR}"
+ "${TensorFlowGEMMLowpSource_DIR}"
+ "${TensorFlowEigenSource_DIR}"
+ "${TensorFlowSource_DIR}")
+ target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+ file(GLOB_RECURSE PAL_SOURCES "${CMSIS_NNSource_DIR}/Source/ActivationFunctions/*.c"
+ "${CMSIS_NNSource_DIR}/Source/BasicMathFunctions/*.c"
+ "${CMSIS_NNSource_DIR}/Source/ConcatenationFunctions/*.c"
+ "${CMSIS_NNSource_DIR}/Source/ConvolutionFunctions/*.c"
+ "${CMSIS_NNSource_DIR}/Source/FullyConnectedFunctions/*.c"
+ "${CMSIS_NNSource_DIR}/Source/LSTMFunctions/*.c"
+ "${CMSIS_NNSource_DIR}/Source/NNSupportFunctions/*.c"
+ "${CMSIS_NNSource_DIR}/Source/PoolingFunctions/*.c"
+ "${CMSIS_NNSource_DIR}/Source/ReshapeFunctions/*.c"
+ "${CMSIS_NNSource_DIR}/Source/SoftmaxFunctions/*.c")
+
+ list(APPEND PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
+ add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES})
+ set_property(TARGET luci_interpreter_cmsisnn_pal PROPERTY POSITION_INDEPENDENT_CODE ON)
+ target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
+ "${TensorFlowRuySource_DIR}"
+ "${TensorFlowGEMMLowpSource_DIR}"
+ "${TensorFlowEigenSource_DIR}"
+ "${TensorFlowSource_DIR}"
+ "${CMSIS_NNSource_DIR}"
+ )
+
+ set(CMSIS_PATH ${CMSISSource_DIR} CACHE INTERNAL "CMSIS_PATH")
+ add_subdirectory(${CMSIS_NNSource_DIR} ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN)
+
+ target_include_directories(luci_interpreter_cmsisnn_pal PUBLIC
+ "${CMSISSource_DIR}/CMSIS/DSP/Include"
+ "${CMSISSource_DIR}/CMSIS/Core/Include"
+ "${CMSIS_NNSource_DIR}/Include")
+
+ target_link_libraries(${TGT} PRIVATE luci_interpreter_cmsisnn_pal)
+endmacro()
--- /dev/null
+REGISTER_KERNEL(ADD, Add)
+REGISTER_KERNEL(ARG_MAX, ArgMax)
+REGISTER_KERNEL(AVERAGE_POOL_2D, AveragePool2D)
+REGISTER_KERNEL(BATCH_MATMUL, BatchMatMul)
+REGISTER_KERNEL(BATCH_TO_SPACE_ND, BatchToSpaceND)
+REGISTER_KERNEL(CAST, Cast)
+REGISTER_KERNEL(CONCATENATION, Concatenation)
+REGISTER_KERNEL(CONV_2D, Conv2D)
+REGISTER_KERNEL(DEPTH_TO_SPACE, DepthToSpace)
+REGISTER_KERNEL(DEPTHWISE_CONV_2D, DepthwiseConv2D)
+REGISTER_KERNEL(DEQUANTIZE, Dequantize)
+REGISTER_KERNEL(DIV, Div)
+REGISTER_KERNEL(ELU, Elu)
+REGISTER_KERNEL(EXP, Exp)
+REGISTER_KERNEL(EXPAND_DIMS, ExpandDims)
+REGISTER_KERNEL(FILL, Fill)
+REGISTER_KERNEL(FLOOR, Floor)
+REGISTER_KERNEL(FLOOR_DIV, FloorDiv)
+REGISTER_KERNEL(EQUAL, Equal)
+REGISTER_KERNEL(FULLY_CONNECTED, FullyConnected)
+REGISTER_KERNEL(GATHER, Gather)
+REGISTER_KERNEL(GREATER, Greater)
+REGISTER_KERNEL(GREATER_EQUAL, GreaterEqual)
+REGISTER_KERNEL(IF, If)
+REGISTER_KERNEL(INSTANCE_NORM, InstanceNorm)
+REGISTER_KERNEL(L2_NORMALIZATION, L2Normalize)
+REGISTER_KERNEL(L2_POOL_2D, L2Pool2D)
+REGISTER_KERNEL(LEAKY_RELU, LeakyRelu)
+REGISTER_KERNEL(LESS, Less)
+REGISTER_KERNEL(LESS_EQUAL, LessEqual)
+REGISTER_KERNEL(LOCAL_RESPONSE_NORMALIZATION, LocalResponseNormalization)
+REGISTER_KERNEL(LOGICAL_AND, LogicalAnd)
+REGISTER_KERNEL(LOGICAL_NOT, LogicalNot)
+REGISTER_KERNEL(LOGICAL_OR, LogicalOr)
+REGISTER_KERNEL(LOGISTIC, Logistic)
+REGISTER_KERNEL(LOG_SOFTMAX, LogSoftmax)
+REGISTER_KERNEL(MAXIMUM, Maximum)
+REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D)
+REGISTER_KERNEL(MEAN, Mean)
+REGISTER_KERNEL(MINIMUM, Minimum)
+REGISTER_KERNEL(MIRROR_PAD, MirrorPad)
+REGISTER_KERNEL(MUL, Mul)
+REGISTER_KERNEL(NEG, Neg)
+REGISTER_KERNEL(NOT_EQUAL, NotEqual)
+REGISTER_KERNEL(ONE_HOT, OneHot)
+REGISTER_KERNEL(PACK, Pack)
+REGISTER_KERNEL(PAD, Pad)
+REGISTER_KERNEL(PADV2, PadV2)
+REGISTER_KERNEL(POW, Pow)
+REGISTER_KERNEL(PRELU, PRelu)
+REGISTER_KERNEL(QUANTIZE, Quantize)
+REGISTER_KERNEL(RELU, Relu)
+REGISTER_KERNEL(RELU6, Relu6)
+REGISTER_KERNEL(RESHAPE, Reshape)
+REGISTER_KERNEL(RESIZE_BILINEAR, ResizeBilinear)
+REGISTER_KERNEL(RESIZE_NEAREST_NEIGHBOR, ResizeNearestNeighbor)
+REGISTER_KERNEL(REVERSE_V2, ReverseV2)
+REGISTER_KERNEL(RSQRT, Rsqrt)
+REGISTER_KERNEL(SHAPE, Shape)
+REGISTER_KERNEL(SLICE, Slice)
+REGISTER_KERNEL(SOFTMAX, Softmax)
+REGISTER_KERNEL(SPACE_TO_BATCH_ND, SpaceToBatchND)
+REGISTER_KERNEL(SPACE_TO_DEPTH, SpaceToDepth)
+REGISTER_KERNEL(SPLIT, Split)
+REGISTER_KERNEL(SPLIT_V, SplitV)
+REGISTER_KERNEL(STRIDED_SLICE, StridedSlice)
+REGISTER_KERNEL(SQRT, Sqrt)
+REGISTER_KERNEL(SQUARE, Square)
+REGISTER_KERNEL(SQUARED_DIFFERENCE, SquaredDifference)
+REGISTER_KERNEL(SQUEEZE, Squeeze)
+REGISTER_KERNEL(SUB, Sub)
+REGISTER_KERNEL(SVDF, SVDF)
+REGISTER_KERNEL(TANH, Tanh)
+REGISTER_KERNEL(TRANSPOSE, Transpose)
+REGISTER_KERNEL(TRANSPOSE_CONV, TransposeConv)
+REGISTER_KERNEL(UNPACK, Unpack)
+REGISTER_KERNEL(WHILE, While)
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FILL_H
+#define LUCI_INTERPRETER_PAL_FILL_H
+
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+#endif // LUCI_INTERPRETER_PAL_FILL_H
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape,
+ const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, int16_t *activation_state_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+ int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+ int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+ tflite::reference_ops::EvalIntegerSVDF(¶ms, input_shape, input_data, weight_feature_shape,
+ weight_feature_data, weight_time_shape, weight_time_data,
+ bias_shape, bias_data, activation_state_data, output_shape,
+ output_data, scratchpad_data, output_temp_data, scale_1_a,
+ scale_1_b, scale_2_a, scale_2_b, input_zp, output_zp);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, float *scratchpad_data, float *activation_state_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::EvalFloatSVDF(¶ms, input_shape, input_data, weight_feature_shape,
+ weight_feature_data, weight_time_shape, weight_time_data,
+ bias_shape, bias_data, scratchpad_data,
+ activation_state_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+ const luci_interpreter::DataType &input_data_type,
+ const luci_interpreter::DataType &weight_feature_data_type,
+ luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+ luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+ luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+ const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+ const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+ if (input_data_type == luci_interpreter::DataType::FLOAT32 &&
+ (weight_feature_data_type == luci_interpreter::DataType::S8 ||
+ weight_feature_data_type == luci_interpreter::DataType::U8))
+ {
+ (void)input_shape;
+ (void)weight_time_shape;
+ (void)scratchpad_3;
+ (void)scratchpad_4;
+ (void)scratchpad_5;
+ (void)scratchpad_6;
+
+ assert(false && "Hybrid type is not currently supported for linux platform");
+ }
+
+ // Resize scratchpad_1 tensor
+ scratchpad_1->resize({batch_size, num_filters});
+
+ if (input_data_type == luci_interpreter::DataType::S8)
+ {
+ // Resize scratchpad_2 for full_integer op
+ scratchpad_2->resize({batch_size, num_units});
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
--- /dev/null
+macro(initialize_pal)
+ nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
+
+ if (NOT TensorFlowSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowGEMMLowpSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowEigenSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: Eigen not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowRuySource_FOUND)
+ message(STATUS "Skipping luci-interpreter: Ruy not found")
+ return()
+ endif ()
+
+ find_package(Threads REQUIRED)
+
+ set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+ target_include_directories(${TGT} PRIVATE "${PAL}")
+ target_include_directories(${TGT} SYSTEM PRIVATE
+ "${TensorFlowRuySource_DIR}"
+ "${TensorFlowGEMMLowpSource_DIR}"
+ "${TensorFlowEigenSource_DIR}"
+ "${TensorFlowSource_DIR}")
+ target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+ # TODO put it back, I changed my mind.
+ # instead add sources with visitors in this library
+ set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+
+ if(BUILD_ARM32_NEON)
+ # NOTE may need to revise this list for version upgrade
+ set(PAL_SOURCES ${PAL_SOURCES}
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/cpu_check.cc
+ ${TensorFlowRuySource_DIR}/ruy/allocator.cc
+ ${TensorFlowRuySource_DIR}/ruy/block_map.cc
+ ${TensorFlowRuySource_DIR}/ruy/blocking_counter.cc
+ ${TensorFlowRuySource_DIR}/ruy/context_get_ctx.cc
+ ${TensorFlowRuySource_DIR}/ruy/cpuinfo.cc
+ ${TensorFlowRuySource_DIR}/ruy/ctx.cc
+ ${TensorFlowRuySource_DIR}/ruy/denormal.cc
+ ${TensorFlowRuySource_DIR}/ruy/frontend.cc
+ ${TensorFlowRuySource_DIR}/ruy/pack_arm.cc
+ ${TensorFlowRuySource_DIR}/ruy/prepacked_cache.cc
+ ${TensorFlowRuySource_DIR}/ruy/prepare_packed_matrices.cc
+ ${TensorFlowRuySource_DIR}/ruy/system_aligned_alloc.cc
+ ${TensorFlowRuySource_DIR}/ruy/thread_pool.cc
+ ${TensorFlowRuySource_DIR}/ruy/trmul.cc
+ ${TensorFlowRuySource_DIR}/ruy/tune.cc
+ ${TensorFlowRuySource_DIR}/ruy/wait.cc
+ ${TensorFlowRuySource_DIR}/ruy/kernel_arm32.cc
+ )
+ endif(BUILD_ARM32_NEON)
+
+ add_library(luci_interpreter_linux_pal_micro STATIC ${PAL_SOURCES})
+ set_target_properties(luci_interpreter_linux_pal_micro PROPERTIES POSITION_INDEPENDENT_CODE ON)
+ target_include_directories(luci_interpreter_linux_pal_micro SYSTEM PRIVATE
+ "${TensorFlowRuySource_DIR}"
+ "${TensorFlowGEMMLowpSource_DIR}"
+ "${TensorFlowEigenSource_DIR}"
+ "${TensorFlowSource_DIR}"
+ )
+
+ target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_linux_pal_micro)
+endmacro()
--- /dev/null
+REGISTER_KERNEL(FULLY_CONNECTED, FullyConnected)
+REGISTER_KERNEL(CONV_2D, Conv2D)
+REGISTER_KERNEL(LOGISTIC, Logistic)
+REGISTER_KERNEL(EXPAND_DIMS, ExpandDims)
+REGISTER_KERNEL(RESHAPE, Reshape)
+REGISTER_KERNEL(MAX_POOL_2D, MaxPool2D)
+REGISTER_KERNEL(CONCATENATION, Concatenation)
+REGISTER_KERNEL(SOFTMAX, Softmax)
+REGISTER_KERNEL(UNIDIRECTIONAL_SEQUENCE_LSTM, UnidirectionalSequenceLSTM)
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_APPLY_ACTIVATION_TO_VECTOR_H
+#define LUCI_INTERPRETER_PAL_APPLY_ACTIVATION_TO_VECTOR_H
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdlib>
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+
+namespace luci_interpreter_pal
+{
+
+// Dynamic (non-fused) activation functor. perhaps it is worth having
+// template instantiation?
+// TODO(aselle): Make this more efficient by pulling the switch to conv_eval
+// using template inlining.
+class ActivationFunctor
+{
+public:
+ explicit ActivationFunctor(TfLiteFusedActivation act) : act_(act) {}
+
+ float operator()(float a) const
+ {
+ switch (act_)
+ {
+ case kTfLiteActNone:
+ return a;
+ case kTfLiteActRelu:
+ return a < 0.f ? 0.f : a;
+ case kTfLiteActRelu6:
+ return std::max(0.f, std::min(a, 6.f));
+ case kTfLiteActTanh:
+ return std::tanh(a);
+ case kTfLiteActSigmoid:
+ return 1.0f / (1.0f + std::exp(-a));
+ default:
+ assert(false && "Activation functor is not supported");
+ }
+ }
+
+private:
+ TfLiteFusedActivation act_;
+};
+
+inline void ApplyActivationToVector(const float *vector, int v_size,
+ TfLiteFusedActivation activation, float *result)
+{
+ auto activation_func = ActivationFunctor(activation);
+ for (int v = 0; v < v_size; v++)
+ {
+ *result++ = (activation_func)(*vector++);
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_APPLY_ACTIVATION_TO_VECTOR_H
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &unextended_input2_shape, const int32_t *block_shape_data,
+ const tflite::RuntimeShape &unextended_input3_shape, const int32_t *crops_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::BatchToSpaceND(
+ unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+ unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &filter_shape,
+ const float *filter_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, const tflite::RuntimeShape &output_shape,
+ float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ float *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data,
+ tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape,
+ const uint8_t *input_data, const tflite::RuntimeShape &filter_shape,
+ const uint8_t *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+ uint8_t *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ uint8_t *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+ scratchpad_data, nullptr);
+}
+
+static inline void
+ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult, const int32_t *shifts,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+ filter_shape, filter_data, bias_shape, bias_data,
+ output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::ConvParams ¶ms,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+{
+ (void)input_data_type;
+ (void)params;
+ (void)input_shape;
+ (void)filter_shape;
+ (void)output_shape;
+ (void)scratchpad;
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "PALreference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams ¶ms,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+ output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams ¶ms,
+ const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FILL_H
+#define LUCI_INTERPRETER_PAL_FILL_H
+
+#include "PALreference_ops.h"
+
+#endif // LUCI_INTERPRETER_PAL_FILL_H
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "PALreference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams ¶ms,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+ int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+ int32_t input_zero_point, int32_t output_zero_point,
+ Output *output_data)
+{
+ tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+ effective_scale_shift, input_zero_point, output_zero_point,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_size_shape, const int32_t *output_size_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+ output_size_shape, output_size_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_size_shape,
+ const int32_t *output_size_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+ output_size_shape, output_size_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape,
+ const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, int16_t *activation_state_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+ int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+ int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+ const int n_rank = params.rank;
+ const int n_batch = input_shape.Dims(0);
+ const int n_input = input_shape.Dims(1);
+ const int n_filter = weight_feature_shape.Dims(0);
+ const int n_unit = n_filter / n_rank;
+ const int n_memory = weight_time_shape.Dims(1);
+
+ // Left shift the activation_state.
+ {
+ int16_t *new_state_start = activation_state_data;
+ const int16_t *old_state_start = activation_state_data + 1;
+ const int16_t *old_state_end = activation_state_data + n_batch * n_filter * n_memory;
+ while (old_state_start != old_state_end)
+ {
+ *new_state_start++ = *old_state_start++;
+ }
+ }
+
+ // Note: no need to clear the latest activation, matmul is not accumulative.
+
+ // Feature matmul.
+ {
+ const int32_t output_max = std::numeric_limits<int16_t>::max();
+ const int32_t output_min = std::numeric_limits<int16_t>::min();
+ int16_t *result_in_batch = activation_state_data + (n_memory - 1);
+ for (int b = 0; b < n_batch; b++)
+ {
+ const int8_t *matrix_ptr = weight_feature_data;
+ for (int r = 0; r < n_filter; r++)
+ {
+ int32_t dot_prod = 0;
+ const int8_t *vector_in_batch = input_data + b * n_input;
+ for (int c = 0; c < n_input; c++)
+ {
+ dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
+ }
+ dot_prod = tflite::MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b);
+ dot_prod = std::min(std::max(output_min, dot_prod), output_max);
+ // This assumes state is symmetrically quantized. Otherwise last bit of
+ // state should be initialized to its zero point and accumulate the
+ // dot_prod.
+ // Equivalent as the following:
+ // result_in_batch = zero point, which happens to be zero.
+ // result_in_batch += dot_prod_56.
+ *result_in_batch = dot_prod;
+ result_in_batch += n_memory;
+ }
+ }
+ }
+
+ // Time.
+ {
+ for (int b = 0; b < n_batch; ++b)
+ {
+ int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+ // Perform batched vector dot product:
+ const int16_t *vector1_ptr = weight_time_data;
+ const int16_t *vector2_ptr = activation_state_data + b * n_memory * n_filter;
+
+ for (int i = 0; i < n_filter; i++)
+ {
+ *scratch_ptr_batch = 0;
+ for (int j = 0; j < n_memory; j++)
+ {
+ *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
+ }
+ scratch_ptr_batch++;
+ }
+ }
+ }
+
+ // Reduce, add bias, rescale, activation.
+ {
+ // Add bias.
+ if (bias_data)
+ {
+ // Vector batch assign:
+ for (int i = 0; i < n_batch; ++i)
+ {
+ int32_t *output_ptr = output_temp_data + i * n_unit;
+ const int32_t *bias_ptr = bias_data;
+ for (int j = 0; j < n_unit; ++j)
+ {
+ *output_ptr++ = *bias_ptr++;
+ }
+ }
+ }
+ else
+ {
+ int32_t *output_ptr = output_temp_data;
+ for (int i = 0; i < n_batch * n_unit; ++i)
+ {
+ *output_ptr++ = 0;
+ }
+ }
+
+ // Reduce.
+ for (int b = 0; b < n_batch; ++b)
+ {
+ int32_t *output_temp_ptr = output_temp_data + b * n_unit;
+ int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+ // Reduction sum vector
+ for (int i = 0; i < n_unit; ++i)
+ {
+ for (int j = 0; j < n_rank; ++j)
+ {
+ output_temp_ptr[i] += *scratch_ptr_batch++;
+ }
+ }
+ }
+
+ // Rescale.
+ const int32_t output_max = std::numeric_limits<int8_t>::max();
+ const int32_t output_min = std::numeric_limits<int8_t>::min();
+ for (int i = 0; i < n_batch * n_unit; ++i)
+ {
+ int32_t x1 = output_temp_data[i];
+ int32_t x2 = tflite::MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b);
+ int32_t x3 = x2 + output_zp;
+ int32_t x4 = std::min(std::max(output_min, x3), output_max);
+ output_data[i] = static_cast<int8_t>(x4);
+ }
+ }
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, float *scratchpad_data, float *activation_state_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ const int32_t rank = params.rank;
+ const int32_t batch_size = input_shape.Dims(0);
+ const int32_t input_size = input_shape.Dims(1);
+ const int32_t num_filters = weight_feature_shape.Dims(0);
+ const int32_t num_units = num_filters / rank;
+ const int32_t memory_size = weight_time_shape.Dims(1);
+
+ // Left shift the activation_state.
+ {
+ float *new_state_start = activation_state_data;
+ const float *old_state_start = activation_state_data + 1;
+ const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+ while (old_state_start != old_state_end)
+ {
+ *new_state_start++ = *old_state_start++;
+ }
+ }
+
+ // Note: no need to clear the latest activation, matmul is not accumulative.
+
+ // Compute conv1d(inputs, weights_feature).
+ // The activation_state's rightmost column is used to save current cycle
+ // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+ // having the stride equal to memory_size.
+
+ // Perform batched matrix vector multiply operation:
+ {
+ const float *matrix = weight_feature_data;
+ const float *vector = input_data;
+ float *result = &activation_state_data[memory_size - 1];
+ float *result_in_batch = result;
+ for (int i = 0; i < batch_size; ++i)
+ {
+ const float *matrix_ptr = matrix;
+ for (int j = 0; j < num_filters; ++j)
+ {
+ float dot_prod = 0.0f;
+ const float *vector_in_batch = vector + i * input_size;
+ for (int k = 0; k < input_size; ++k)
+ {
+ dot_prod += *matrix_ptr++ * *vector_in_batch++;
+ }
+ *result_in_batch = dot_prod;
+ result_in_batch += memory_size;
+ }
+ }
+ }
+
+ tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+ batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+ params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+ const luci_interpreter::DataType &input_data_type,
+ const luci_interpreter::DataType &weight_feature_data_type,
+ luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+ luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+ luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+ const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+ const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+ if (input_data_type == luci_interpreter::DataType::FLOAT32 &&
+ (weight_feature_data_type == luci_interpreter::DataType::S8 ||
+ weight_feature_data_type == luci_interpreter::DataType::U8))
+ {
+ (void)input_shape;
+ (void)weight_time_shape;
+ (void)scratchpad_3;
+ (void)scratchpad_4;
+ (void)scratchpad_5;
+ (void)scratchpad_6;
+
+ assert(false && "Hybrid type is not currently supported for mcu platform");
+ }
+
+ // Resize scratchpad_1 tensor
+ scratchpad_1->resize({batch_size, num_filters});
+
+ if (input_data_type == luci_interpreter::DataType::S8)
+ {
+ // Resize scratchpad_2 for full_integer op
+ scratchpad_2->resize({batch_size, num_units});
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+ float beta)
+{
+ // Do nothing for mcu
+ (void)data;
+ (void)input_scale;
+ (void)beta;
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+ // TODO Impl it
+ assert(false && "Softmax NYI");
+ (void)params;
+ (void)input_scale;
+ (void)beta;
+}
+
+template <typename T>
+static inline void Softmax(const tflite::SoftmaxParams ¶ms,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ // TODO Impl it
+ // MARK: At this moment this operation doesn't support on mcu
+ assert(false && "Softmax NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)output_shape;
+ (void)output_data;
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams ¶ms,
+ const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &unextended_input2_shape, const int32_t *block_shape_data,
+ const tflite::RuntimeShape &unextended_input3_shape, const int32_t *paddings_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::SpaceToBatchND(
+ params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+ unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+#define LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+
+#include "kernels/UnidirectionalSequenceLSTM.h"
+#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
+#include "tensorflow/lite/kernels/internal/reference/tanh.h"
+
+namespace luci_interpreter_pal
+{
+namespace lstm_internal
+{
+namespace
+{
+// Possible fused activation functions.
+typedef enum
+{
+ kTfLiteActNone = 0,
+ kTfLiteActRelu,
+ kTfLiteActReluN1To1, // min(max(-1, x), 1)
+ kTfLiteActRelu6, // min(max(0, x), 6)
+ kTfLiteActTanh,
+ kTfLiteActSignBit,
+ kTfLiteActSigmoid,
+} TfLiteFusedActivation;
+
+} // namespace
+
+template <typename T>
+inline T activationFunctionWithMinMax(T x, T output_activation_min, T output_activation_max)
+{
+ using std::max;
+ using std::min;
+ return min(max(x, output_activation_min), output_activation_max);
+}
+
+template <typename T>
+inline void mul(const luci_interpreter::lstm::ArithmeticParams *params,
+ const tflite::RuntimeShape &input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &input2_shape, const T *input2_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ T output_activation_min = params->quantized_activation_min;
+ T output_activation_max = params->quantized_activation_max;
+
+ const int flat_size = input1_shape.FlatSize();
+ for (int i = 0; i < flat_size; ++i)
+ {
+ output_data[i] = activationFunctionWithMinMax(input1_data[i] * input2_data[i],
+ output_activation_min, output_activation_max);
+ }
+}
+
+#ifndef DIS_QUANT
+inline int32_t multiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
+{
+ using gemmlowp::RoundingDivideByPOT;
+ using gemmlowp::SaturatingRoundingDoublingHighMul;
+ int left_shift = shift > 0 ? shift : 0;
+ int right_shift = shift > 0 ? 0 : -shift;
+ return RoundingDivideByPOT(
+ SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
+}
+
+template <typename InputType, typename WeightType, typename OutputType, typename BiasType>
+void fullyConnectedInteger(const tflite::FullyConnectedParams ¶ms,
+ const tflite::RuntimeShape &input_shape, const InputType *input_data,
+ const tflite::RuntimeShape &filter_shape, const WeightType *filter_data,
+ const tflite::RuntimeShape &bias_shape, const BiasType *bias_data,
+ const tflite::RuntimeShape &output_shape, OutputType *output_data)
+{
+ const int32_t input_offset = params.input_offset;
+ const int32_t filter_offset = params.weights_offset;
+ const int32_t output_offset = params.output_offset;
+ const int32_t output_multiplier = params.output_multiplier;
+ const int output_shift = params.output_shift;
+ const int32_t output_activation_min = params.quantized_activation_min;
+ const int32_t output_activation_max = params.quantized_activation_max;
+ TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
+ TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
+
+ TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+ const int filter_dim_count = filter_shape.DimensionsCount();
+ const int output_dim_count = output_shape.DimensionsCount();
+ const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+ const int output_depth = output_shape.Dims(output_dim_count - 1);
+ TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
+ const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int out_c = 0; out_c < output_depth; ++out_c)
+ {
+ BiasType acc = 0;
+ for (int d = 0; d < accum_depth; ++d)
+ {
+ int32_t input_val = input_data[b * accum_depth + d];
+ int32_t filter_val = filter_data[out_c * accum_depth + d];
+ acc += (filter_val + filter_offset) * (input_val + input_offset);
+ }
+ if (bias_data)
+ {
+ acc += bias_data[out_c];
+ }
+ int32_t acc_scaled = multiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ acc_scaled += output_offset;
+ acc_scaled = std::max(acc_scaled, output_activation_min);
+ acc_scaled = std::min(acc_scaled, output_activation_max);
+ output_data[out_c + output_depth * b] = static_cast<OutputType>(acc_scaled);
+ }
+ }
+}
+
+void fullyConnected(const tflite::FullyConnectedParams ¶ms,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int16_t *output_data)
+{
+ return fullyConnectedInteger(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data);
+}
+
+void fullyConnected(const tflite::FullyConnectedParams ¶ms,
+ const tflite::RuntimeShape &input_shape, const int16_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int64_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int16_t *output_data)
+{
+ return fullyConnectedInteger(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data);
+}
+
+template <typename InputType, typename OutputType>
+void mulElementwise(int size, const luci_interpreter::lstm::ArithmeticParams *params,
+ const InputType *input1_data, const InputType *input2_data,
+ OutputType *output_data)
+{
+ for (int i = 0; i < size; ++i)
+ {
+ const int32_t input1_val = params->input1_offset + input1_data[i];
+ const int32_t input2_val = params->input2_offset + input2_data[i];
+ const int32_t unclamped_result =
+ params->output_offset + multiplyByQuantizedMultiplier(input1_val * input2_val,
+ params->output_multiplier,
+ params->output_shift);
+ const int32_t clamped_output =
+ std::min(params->quantized_activation_max,
+ std::max(params->quantized_activation_min, unclamped_result));
+ output_data[i] = static_cast<OutputType>(clamped_output);
+ }
+}
+
+// Input and output have the same shape in LSTM
+void mul(const tflite::RuntimeShape &shape, const luci_interpreter::lstm::ArithmeticParams *params,
+ const int16_t *input1_data, const int16_t *input2_data, int8_t *output_data)
+{
+ return mulElementwise<int16_t, int8_t>(shape.FlatSize(), params, input1_data, input2_data,
+ output_data);
+}
+
+// Input and output have the same shape in LSTM
+void mul(const tflite::RuntimeShape &shape, const luci_interpreter::lstm::ArithmeticParams *params,
+ const int16_t *input1_data, const int16_t *input2_data, int16_t *output_data)
+{
+ return mulElementwise(shape.FlatSize(), params, input1_data, input2_data, output_data);
+}
+
+void addElementWise(const int16_t *input_1, const int16_t *input_2, int n_batch, int n_input,
+ int16_t *output)
+{
+ for (int batch = 0; batch < n_batch; ++batch)
+ {
+ for (int i = 0; i < n_input; ++i)
+ {
+ const int index = batch * n_input + i;
+ int32_t sum = input_1[index] + input_2[index];
+ const int32_t sum_clamped =
+ std::min(static_cast<int32_t>(std::numeric_limits<int16_t>::max()),
+ std::max(static_cast<int32_t>(std::numeric_limits<int16_t>::min()), sum));
+ output[index] = static_cast<int16_t>(sum_clamped);
+ }
+ }
+}
+
+void tanh(int32_t cell_state_scale_power, const tflite::RuntimeShape &input_data_shape,
+ int16_t *input_data, const tflite::RuntimeShape &output_data_shape, int16_t *output_data)
+{
+ int32_t tanh_input_left_shift = (15 + cell_state_scale_power) - 3;
+ int32_t input_multiplier = 0;
+ if (tanh_input_left_shift < 0) /* handling negative shift value */
+ {
+ tanh_input_left_shift = -tanh_input_left_shift;
+ input_multiplier = 3;
+ }
+ tflite::reference_integer_ops::Tanh(input_multiplier, tanh_input_left_shift, input_data_shape,
+ input_data, output_data_shape, output_data);
+}
+
+void sigmoid(const tflite::RuntimeShape &data_shape, int16_t *data)
+{
+ tflite::reference_integer_ops::Logistic(0 /*data->input_multiplier*/,
+ 0 /*data->input_left_shift */,
+ data_shape.FlatSize() /*NumElements(input->dims)*/,
+ data /* tflite::micro::GetTensorData<int16_t>(input) */,
+ data /*tflite::micro::GetTensorData<int16_t>(output) */);
+}
+
+void clipping(const int v_size, const luci_interpreter::lstm::CellStateInfo *cell_state_info,
+ int16_t *vector)
+{
+ for (int i = 0; i < v_size; i++)
+ {
+ vector[i] = std::max(std::min(cell_state_info->quantized_cell_clip, vector[i]),
+ static_cast<int16_t>(-cell_state_info->quantized_cell_clip));
+ }
+}
+#endif // DIS_QUANT
+
+#ifndef DIS_FLOAT
+void fullyConnected(const tflite::FullyConnectedParams ¶ms,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &filter_shape, const float *filter_data,
+ const tflite::RuntimeShape &bias_shape, const float *bias_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ return tflite::reference_ops::FullyConnected(params, input_shape, input_data, filter_shape,
+ filter_data, bias_shape, bias_data, output_shape,
+ output_data);
+}
+
+// Input and output have the same shape in LSTM
+void mul(const tflite::RuntimeShape &shape, const luci_interpreter::lstm::ArithmeticParams *params,
+ const float *input1_data, const float *input2_data, float *output_data)
+{
+ return mul(params, shape, input1_data, shape, input2_data, shape, output_data);
+}
+
+void addElementWise(const float *input_1, const float *input_2, int n_batch, int n_input,
+ float *output)
+{
+ for (int batch = 0; batch < n_batch; ++batch)
+ {
+ for (int i = 0; i < n_input; ++i)
+ {
+ const int index = batch * n_input + i;
+ output[index] = input_1[index] + input_2[index];
+ }
+ }
+}
+
+void tanh(int32_t cell_state_scale_power, const tflite::RuntimeShape &input_data_shape,
+ float *input_data, const tflite::RuntimeShape &output_data_shape, float *output_data)
+{
+ tflite::reference_ops::Tanh(input_data_shape, input_data, output_data_shape, output_data);
+}
+
+void sigmoid(const tflite::RuntimeShape &data_shape, float *data)
+{
+ tflite::reference_ops::Logistic(data_shape, data, data_shape, data);
+}
+
+void clipping(const int v_size, const luci_interpreter::lstm::CellStateInfo *cell_state_info,
+ float *vector)
+{
+ for (int i = 0; i < v_size; i++)
+ {
+ vector[i] =
+ std::max(std::min(cell_state_info->cell_clip, vector[i]), -cell_state_info->cell_clip);
+ }
+}
+#endif // DIS_FLOAT
+
+// Size information about the LSTM kernel, which is deduced from tensors stored
+// in the flat buffer file.
+struct LstmSizeInfo
+{
+ bool time_major;
+ int32_t batch_size;
+ int32_t time_steps;
+ int32_t input_dimension;
+ int32_t state_dimension;
+};
+
+class LstmStepManager
+{
+public:
+ LstmStepManager() = delete;
+ // Does not take any ownership, and all pointers must refer to valid objects
+ // that outlive the one constructed.
+ explicit LstmStepManager(const LstmSizeInfo &size_info) : size_info_(size_info) {}
+
+ void updateTime()
+ {
+ current_time_ += 1;
+ // default as one batch per inference
+ int input_step = size_info_.input_dimension;
+ int output_step = size_info_.state_dimension;
+ // time major: batch inference
+ if (size_info_.time_major)
+ {
+ input_step = input_step * size_info_.batch_size;
+ output_step = output_step * size_info_.batch_size;
+ }
+
+ input_offset_ += input_step;
+ output_offset_ += output_step;
+ }
+
+ void updateBatch()
+ {
+ current_batch_ += 1;
+ TFLITE_DCHECK_LE(current_batch_, size_info_.batch_size);
+ // batch inference for time major: no action needed
+ if (size_info_.time_major)
+ {
+ return;
+ }
+ // otherwise: singe batch inference, go to the next batch
+ hidden_state_offset_ += size_info_.state_dimension;
+ cell_state_offset_ += size_info_.state_dimension;
+ }
+
+ void resetTime() { current_time_ = 0; }
+
+ tflite::RuntimeShape inputShape() const
+ {
+ int batch_size = 1;
+ if (size_info_.time_major)
+ {
+ batch_size = size_info_.batch_size;
+ }
+ const int dims[2] = {batch_size, size_info_.input_dimension};
+ const int32_t *dims_data = reinterpret_cast<const int32_t *>(dims);
+ return tflite::RuntimeShape(2, dims_data);
+ }
+
+ tflite::RuntimeShape stateShape() const
+ {
+ int batch_size = 1;
+ if (size_info_.time_major)
+ {
+ batch_size = size_info_.batch_size;
+ }
+ const int dims[2] = {batch_size, size_info_.state_dimension};
+ const int32_t *dims_data = reinterpret_cast<const int32_t *>(dims);
+ return tflite::RuntimeShape(2, dims_data);
+ }
+
+ int inputOffset() const { return input_offset_; }
+
+ int outputOffset() const { return output_offset_; }
+
+ int hiddenStateOffset() const { return hidden_state_offset_; }
+
+ int cellStateOffset() const { return cell_state_offset_; }
+
+private:
+ int32_t current_time_ = 0;
+ int32_t current_batch_ = 0;
+ int32_t input_offset_ = 0;
+ int32_t output_offset_ = 0;
+ int32_t hidden_state_offset_ = 0;
+ int32_t cell_state_offset_ = 0;
+
+ const LstmSizeInfo &size_info_;
+};
+
+// Calculates a single LSTM gate.
+// Implements the following formula:
+// gate = activate(FC(input) + FC(recurrent))
+// Activation is sigmoid except for the "cell" gate (configurable, usually tanh)
+template <typename ActivationType, typename WeightType, typename CellType, typename BiasType>
+void calculateLstmGate(const LstmStepManager *step_info,
+ const luci_interpreter::lstm::GateParameters *gate_params,
+ // Input FC
+ ActivationType *input_data, const circle::Tensor *input_weight,
+ const circle::Tensor *input_bias,
+ // Recurrent FC
+ ActivationType *recurrent_data, const circle::Tensor *recurrent_weight,
+ const circle::Tensor *recurrent_bias,
+ // Output
+ CellType *gate_output,
+ // Scratch arrays
+ CellType *fc_output_buffer, const TfLiteFusedActivation activation,
+ luci_interpreter::BaseRuntimeGraph *runtime_graph)
+{
+ // Input FC
+ const auto gate_output_shape = step_info->stateShape();
+ {
+ tflite::FullyConnectedParams op_params{};
+ op_params.input_offset = gate_params->input_fc_params.input_offset;
+ op_params.weights_offset = gate_params->input_fc_params.weights_offset;
+ op_params.output_offset = gate_params->input_fc_params.output_offset;
+ op_params.output_multiplier = gate_params->input_fc_params.output_multiplier;
+ op_params.output_shift = gate_params->input_fc_params.output_shift;
+ op_params.quantized_activation_min = gate_params->input_fc_params.quantized_activation_min;
+ op_params.quantized_activation_max = gate_params->input_fc_params.quantized_activation_max;
+ op_params.float_activation_max = gate_params->input_fc_params.float_activation_max;
+ op_params.float_activation_min = gate_params->input_fc_params.float_activation_min;
+
+ fullyConnected(op_params, step_info->inputShape(), input_data + step_info->inputOffset(),
+ luci_interpreter::kernels::getTensorShape(input_weight),
+ luci_interpreter::kernels::getTensorData<WeightType>(
+ runtime_graph->getConstDataByTensor(input_weight)),
+ luci_interpreter::kernels::getTensorShape(input_bias),
+ luci_interpreter::kernels::getTensorData<BiasType>(
+ runtime_graph->getConstDataByTensor(input_bias)),
+ gate_output_shape, gate_output);
+ }
+
+ // Recurrent FC
+ {
+ tflite::FullyConnectedParams op_params{};
+ op_params.input_offset = gate_params->recurrent_fc_params.input_offset;
+ op_params.weights_offset = gate_params->recurrent_fc_params.weights_offset;
+ op_params.output_offset = gate_params->recurrent_fc_params.output_offset;
+ op_params.output_multiplier = gate_params->recurrent_fc_params.output_multiplier;
+ op_params.output_shift = gate_params->recurrent_fc_params.output_shift;
+ op_params.quantized_activation_min = gate_params->recurrent_fc_params.quantized_activation_min;
+ op_params.quantized_activation_max = gate_params->recurrent_fc_params.quantized_activation_max;
+ op_params.float_activation_max = gate_params->recurrent_fc_params.float_activation_max;
+ op_params.float_activation_min = gate_params->recurrent_fc_params.float_activation_min;
+
+ fullyConnected(op_params, step_info->stateShape(),
+ recurrent_data + step_info->hiddenStateOffset(),
+ luci_interpreter::kernels::getTensorShape(recurrent_weight),
+ luci_interpreter::kernels::getTensorData<WeightType>(
+ runtime_graph->getConstDataByTensor(recurrent_weight)),
+ luci_interpreter::kernels::getTensorShape(recurrent_bias),
+ luci_interpreter::kernels::getTensorData<BiasType>(
+ runtime_graph->getConstDataByTensor(recurrent_bias)),
+ gate_output_shape, fc_output_buffer);
+
+ addElementWise(gate_output, fc_output_buffer, /*n_batch=*/gate_output_shape.DimsData()[0],
+ /*n_state=*/gate_output_shape.DimsData()[1], gate_output);
+
+ switch (activation)
+ {
+ case TfLiteFusedActivation::kTfLiteActSigmoid:
+ sigmoid(gate_output_shape, gate_output);
+ break;
+ case TfLiteFusedActivation::kTfLiteActTanh:
+ {
+ // Set the scale power to -12 to avoid shift
+ tanh(/*cell_state_scale_power=*/-12, gate_output_shape, gate_output, gate_output_shape,
+ gate_output);
+ }
+ break;
+ default:
+ // Only Sigmoid or Tanh is used.
+ assert(false && "Only Sigmoid or Tanh is used");
+ }
+ }
+}
+
+// Update the hidden state of the LSTM kernel using the following formula:
+// updated_hidden_state = Tanh(updated_cell_state) * output_gate_output, * means
+// element wise multiplication
+template <typename CellType, typename ActivationType>
+void updateLstmHidden(const LstmStepManager *step_info, CellType *cell_state_data_base,
+ ActivationType *hidden_state_data, const CellType *output_gate_output,
+ const luci_interpreter::lstm::ArithmeticParams *mul_params,
+ int32_t cell_state_scale_power, CellType *buffer)
+{
+ auto cell_state_shape = step_info->stateShape();
+ CellType *cell_state_data = cell_state_data_base + step_info->cellStateOffset();
+ // Tanh(cell_state)
+ tanh(cell_state_scale_power, cell_state_shape, cell_state_data, cell_state_shape, buffer);
+ // Update the hidden state
+ mul(cell_state_shape, mul_params, buffer, output_gate_output,
+ hidden_state_data + step_info->hiddenStateOffset());
+}
+
+// Update the cell state using the output from the forget gate, input gate, and
+// cell gate Formula: updated_cell_state = forget_gate_output*cell_state +
+// input_gate_output * cell_gate_output, where * denotes element wise
+// multiplication
+template <typename CellType>
+void updateLstmCell(const LstmStepManager *step_info, CellType *cell_state_data,
+ // Gate outputs
+ CellType *forget_gate_output, const CellType *input_gate_output,
+ const CellType *cell_gate_output,
+ // Mul parameters
+ const luci_interpreter::lstm::ArithmeticParams &forget_cell_mul_params,
+ const luci_interpreter::lstm::ArithmeticParams &input_mul_params,
+ const luci_interpreter::lstm::CellStateInfo *cell_state_info, CellType *buffer)
+{
+ auto cell_state_shape = step_info->stateShape();
+ // Forget Gate x Cell State
+ mul(cell_state_shape, &forget_cell_mul_params, forget_gate_output,
+ cell_state_data + step_info->cellStateOffset(),
+ cell_state_data + step_info->cellStateOffset());
+ // Input Gate x Cell Gate
+ mul(cell_state_shape, &input_mul_params, input_gate_output, cell_gate_output, buffer);
+
+ // Update the cell state
+ addElementWise(cell_state_data + step_info->cellStateOffset(), buffer,
+ /*n_batch=*/cell_state_shape.DimsData()[0],
+ /*n_state=*/cell_state_shape.DimsData()[1],
+ cell_state_data + step_info->cellStateOffset());
+
+ if (cell_state_info->cell_clip > 0)
+ {
+ clipping(cell_state_shape.FlatSize(), cell_state_info,
+ cell_state_data + step_info->cellStateOffset());
+ }
+}
+
+template <typename ActivationType, typename WeightType, typename CellType, typename BiasType>
+void lstmStep(luci_interpreter::lstm::LSTMStruct *lstm_struct,
+ luci_interpreter::lstm::LSTMParameters *lstm_params, LstmStepManager *step_info,
+ luci_interpreter::lstm::CellStateInfo *cell_state_info,
+ ActivationType *output_state_data, CellType *cell_state_data, CellType *scratch0,
+ CellType *scratch1, CellType *scratch2, CellType *scratch3,
+ luci_interpreter::BaseRuntimeGraph *runtime_graph)
+{
+ /*Step1: Calculate gate outputs to prepare cell state update*/
+ CellType *gate_internal_buffer = scratch3;
+ CellType *forget_gate_output = scratch0;
+
+ auto input_data = luci_interpreter::kernels::getTensorData<ActivationType>(
+ runtime_graph->getDataByTensor(lstm_struct->input()));
+
+ calculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+ step_info, &lstm_params->forget_gate_parameters,
+ // Input FC
+ input_data, lstm_struct->input_to_forget_weights(), lstm_struct->forget_gate_bias(),
+ // Recurrent FC
+ output_state_data, lstm_struct->recurrent_to_forget_weights(), nullptr,
+ // Output
+ forget_gate_output, gate_internal_buffer, TfLiteFusedActivation::kTfLiteActSigmoid,
+ runtime_graph);
+
+ // Input Gate calculation;
+ CellType *input_gate_output = scratch1;
+ calculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+ step_info, &lstm_params->input_gate_parameters,
+ // Input FC
+ input_data, lstm_struct->input_to_input_weights(), lstm_struct->input_gate_bias(),
+ // Recurrent FC
+ output_state_data, lstm_struct->recurrent_to_input_weights(),
+ /*recurrent_bias*/ nullptr,
+ // Output
+ input_gate_output,
+ // Scratch arrays
+ gate_internal_buffer, TfLiteFusedActivation::kTfLiteActSigmoid, runtime_graph);
+
+ // Cell Gate calculation
+ CellType *cell_gate_output = scratch2;
+ calculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+ step_info, &lstm_params->cell_gate_parameters,
+ // Input FC
+ input_data, lstm_struct->input_to_cell_weights(), lstm_struct->cell_gate_bias(),
+ // Recurrent FC
+ output_state_data, lstm_struct->recurrent_to_cell_weights(),
+ /*recurrent_bias*/ nullptr,
+ // Output
+ cell_gate_output,
+ // Scratch arrays
+ gate_internal_buffer, TfLiteFusedActivation::kTfLiteActTanh, runtime_graph);
+
+ /*Step2: update the cell state */
+ {
+ // const InterGateParameters& inter_gate_params = op_data.inter_gate_parameters;
+ CellType *updated_input_buffer = scratch1; // reuse buffer
+
+ updateLstmCell<CellType>(
+ step_info, cell_state_data, forget_gate_output, input_gate_output, cell_gate_output,
+ lstm_params->inter_gate_parameters.forget_cell_mul_params,
+ lstm_params->inter_gate_parameters.input_mul_params, cell_state_info, updated_input_buffer);
+ }
+
+ {
+ /*Step3: update the hidden state */
+ CellType *output_gate_output = scratch1; // reuse buffer
+ calculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+ step_info, &lstm_params->output_gate_parameters,
+ // Input FC
+ input_data, lstm_struct->input_to_output_weights(), lstm_struct->output_gate_bias(),
+ // Recurrent FC
+ output_state_data, lstm_struct->recurrent_to_output_weights(), nullptr,
+ // Output
+ output_gate_output,
+ // Scratch arrays
+ gate_internal_buffer, TfLiteFusedActivation::kTfLiteActSigmoid, runtime_graph);
+ CellType *tanh_activated_cell_buffer = scratch0; // reuse buffer
+ updateLstmHidden<CellType, ActivationType>(
+ step_info, cell_state_data, output_state_data, output_gate_output,
+ &lstm_params->inter_gate_parameters.output_mul_params,
+ cell_state_info->cell_state_scale_power, tanh_activated_cell_buffer);
+
+ ActivationType *output_ptr = luci_interpreter::kernels::getTensorData<ActivationType>(
+ runtime_graph->getDataByTensor(lstm_struct->output()));
+ std::memcpy(output_ptr + step_info->outputOffset(),
+ output_state_data + step_info->hiddenStateOffset(),
+ step_info->stateShape().FlatSize() * sizeof(ActivationType));
+ }
+}
+
+} // namespace lstm_internal
+
+// Evaluate the LSTM kernel with (potential) multi-steps and multi-batch input
+template <typename ActivationType, typename WeightType, typename CellType, typename BiasType>
+void evalLSTM(luci_interpreter::lstm::LSTMStruct *lstm_struct,
+ luci_interpreter::lstm::LSTMParameters *lstm_params,
+ luci_interpreter::lstm::CellStateInfo *cell_state_info,
+ ActivationType *output_state_data, CellType *cell_state_data, CellType *scratch0,
+ CellType *scratch1, CellType *scratch2, CellType *scratch3,
+ luci_interpreter::BaseRuntimeGraph *runtime_graph)
+{
+ lstm_internal::LstmSizeInfo size_info;
+
+ size_info.time_major = lstm_struct->options->time_major();
+ size_info.batch_size = size_info.time_major
+ ? luci_interpreter::Tensor::dim(lstm_struct->input(), 1)
+ : luci_interpreter::Tensor::dim(lstm_struct->input(), 0);
+ size_info.time_steps = size_info.time_major
+ ? luci_interpreter::Tensor::dim(lstm_struct->input(), 0)
+ : luci_interpreter::Tensor::dim(lstm_struct->input(), 1);
+ size_info.input_dimension = luci_interpreter::Tensor::dim(lstm_struct->input(), 2);
+ size_info.state_dimension = luci_interpreter::Tensor::dim(lstm_struct->output_state(), 1);
+
+ lstm_internal::LstmStepManager step_info(size_info);
+
+ // time is the first dimention, enable batch computation
+ if (size_info.time_major)
+ {
+ for (int t = 0; t < size_info.time_steps; t++)
+ {
+ lstm_internal::lstmStep<ActivationType, WeightType, CellType, BiasType>(
+ lstm_struct, lstm_params, &step_info, cell_state_info, output_state_data, cell_state_data,
+ scratch0, scratch1, scratch2, scratch3, runtime_graph);
+ // prepare for the next time step
+ step_info.updateTime();
+ }
+ }
+ else
+ {
+ // batch first, unable to size the input data. single batch inference
+ for (int b = 0; b < size_info.batch_size; b++)
+ {
+ for (int t = 0; t < size_info.time_steps; t++)
+ {
+ lstm_internal::lstmStep<ActivationType, WeightType, CellType, BiasType>(
+ lstm_struct, lstm_params, &step_info, cell_state_info, output_state_data, cell_state_data,
+ scratch0, scratch1, scratch2, scratch3, runtime_graph);
+ // prepare for the next time step
+ step_info.updateTime();
+ }
+ // prepare for the next batch
+ step_info.updateBatch();
+ step_info.resetTime();
+ }
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_UNIDIRECTIONAL_SEQUENCE_LSTM_H
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <type_traits>
+
+#include "third_party/eigen3/Eigen/Core"
+#include "fixedpoint/fixedpoint.h"
+#include "ruy/profiler/instrumentation.h" // from @ruy
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/add.h"
+#include "tensorflow/lite/kernels/internal/reference/add_n.h"
+#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_matmul.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "tensorflow/lite/kernels/internal/reference/cast.h"
+#include "tensorflow/lite/kernels/internal/reference/ceil.h"
+#include "tensorflow/lite/kernels/internal/reference/comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
+#include "tensorflow/lite/kernels/internal/reference/conv.h"
+#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h"
+#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/div.h"
+#include "tensorflow/lite/kernels/internal/reference/elu.h"
+#include "tensorflow/lite/kernels/internal/reference/exp.h"
+#include "tensorflow/lite/kernels/internal/reference/fill.h"
+#include "tensorflow/lite/kernels/internal/reference/floor.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_div.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_mod.h"
+#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/lite/kernels/internal/reference/gather.h"
+#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
+#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
+#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
+#include "tensorflow/lite/kernels/internal/reference/log_softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h"
+#include "tensorflow/lite/kernels/internal/reference/mul.h"
+#include "tensorflow/lite/kernels/internal/reference/neg.h"
+#include "tensorflow/lite/kernels/internal/reference/pad.h"
+#include "tensorflow/lite/kernels/internal/reference/pooling.h"
+#include "tensorflow/lite/kernels/internal/reference/prelu.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/kernels/internal/reference/quantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reduce.h"
+#include "tensorflow/lite/kernels/internal/reference/requantize.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h"
+#include "tensorflow/lite/kernels/internal/reference/round.h"
+#include "tensorflow/lite/kernels/internal/reference/softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h"
+#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
+#include "tensorflow/lite/kernels/internal/reference/string_comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/sub.h"
+#include "tensorflow/lite/kernels/internal/reference/tanh.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h"
+#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
+#include "tensorflow/lite/kernels/internal/tensor.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+namespace tflite
+{
+
+namespace reference_ops
+{
+
+template <typename T>
+inline void Relu(const RuntimeShape &input_shape, const T *input_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const T val = input_data[i];
+ const T lower = 0;
+ const T clamped = val < lower ? lower : val;
+ output_data[i] = clamped;
+ }
+}
+
+template <typename T>
+inline void Relu1(const RuntimeShape &input_shape, const T *input_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ ruy::profiler::ScopeLabel label("Relu1 (not fused)");
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const T val = input_data[i];
+ const T upper = 1;
+ const T lower = -1;
+ const T clamped = val > upper ? upper : val < lower ? lower : val;
+ output_data[i] = clamped;
+ }
+}
+
+inline void Relu6(const RuntimeShape &input_shape, const float *input_data,
+ const RuntimeShape &output_shape, float *output_data)
+{
+ ruy::profiler::ScopeLabel label("Relu6 (not fused)");
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const float val = input_data[i];
+ const float upper = 6;
+ const float lower = 0;
+ const float clamped = val > upper ? upper : val < lower ? lower : val;
+ output_data[i] = clamped;
+ }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ReluParams ¶ms, const RuntimeShape &input_shape,
+ const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+ ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const int32 val = static_cast<int32_t>(input_data[i]);
+ int32 clamped = params.output_offset + MultiplyByQuantizedMultiplier(val - params.input_offset,
+ params.output_multiplier,
+ params.output_shift);
+ clamped = std::max(params.quantized_activation_min, clamped);
+ clamped = std::min(params.quantized_activation_max, clamped);
+ output_data[i] = static_cast<T>(clamped);
+ }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ActivationParams ¶ms, const RuntimeShape &input_shape,
+ const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+ ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ const T max_value = params.quantized_activation_max;
+ const T min_value = params.quantized_activation_min;
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const T val = input_data[i];
+ const T clamped = val > max_value ? max_value : val < min_value ? min_value : val;
+ output_data[i] = clamped;
+ }
+}
+
+// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary
+// dimensionality if the runtime code does a single loop over one dimension
+// that handles broadcasting as the base case. The code generator would then
+// generate max(D1, D2) nested for loops.
+inline void BroadcastMulFivefold(const ArithmeticParams &unswitched_params,
+ const RuntimeShape &unswitched_input1_shape,
+ const uint8 *unswitched_input1_data,
+ const RuntimeShape &unswitched_input2_shape,
+ const uint8 *unswitched_input2_data,
+ const RuntimeShape &output_shape, uint8 *output_data)
+{
+ ArithmeticParams switched_params = unswitched_params;
+ switched_params.input1_offset = unswitched_params.input2_offset;
+ switched_params.input2_offset = unswitched_params.input1_offset;
+
+ const bool use_unswitched = unswitched_params.broadcast_category ==
+ tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
+
+ const ArithmeticParams ¶ms = use_unswitched ? unswitched_params : switched_params;
+ const uint8 *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data;
+ const uint8 *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data;
+
+ // Fivefold nested loops. The second input resets its position for each
+ // iteration of the second loop. The first input resets its position at the
+ // beginning of the fourth loop. The innermost loop is an elementwise Mul of
+ // sections of the arrays.
+ uint8 *output_data_ptr = output_data;
+ const uint8 *input1_data_ptr = input1_data;
+ const uint8 *input2_data_reset = input2_data;
+ int y0 = params.broadcast_shape[0];
+ int y1 = params.broadcast_shape[1];
+ int y2 = params.broadcast_shape[2];
+ int y3 = params.broadcast_shape[3];
+ int y4 = params.broadcast_shape[4];
+ for (int i0 = 0; i0 < y0; ++i0)
+ {
+ const uint8 *input2_data_ptr;
+ for (int i1 = 0; i1 < y1; ++i1)
+ {
+ input2_data_ptr = input2_data_reset;
+ for (int i2 = 0; i2 < y2; ++i2)
+ {
+ for (int i3 = 0; i3 < y3; ++i3)
+ {
+ MulElementwise(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr);
+ input2_data_ptr += y4;
+ output_data_ptr += y4;
+ }
+ input1_data_ptr += y4;
+ }
+ }
+ input2_data_reset = input2_data_ptr;
+ }
+}
+
+inline void Mul(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape,
+ const int16 *input1_data, const RuntimeShape &input2_shape,
+ const int16 *input2_data, const RuntimeShape &output_shape, int16 *output_data)
+{
+ ruy::profiler::ScopeLabel label("Mul/Int16");
+
+ const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+ for (int i = 0; i < flat_size; i++)
+ {
+ // F0 uses 0 integer bits, range [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+ F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+ output_data[i] = unclamped_result.raw();
+ }
+}
+
+inline void Mul(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape,
+ const int16 *input1_data, const RuntimeShape &input2_shape,
+ const int16 *input2_data, const RuntimeShape &output_shape, uint8 *output_data)
+{
+ ruy::profiler::ScopeLabel label("Mul/Int16Uint8");
+ int32 output_offset = params.output_offset;
+ int32 output_activation_min = params.quantized_activation_min;
+ int32 output_activation_max = params.quantized_activation_max;
+ TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+
+ const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+ for (int i = 0; i < flat_size; i++)
+ {
+ // F0 uses 0 integer bits, range [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+ F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+ int16 rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
+ int16 clamped_result = std::min<int16>(output_activation_max - output_offset, rescaled_result);
+ clamped_result = std::max<int16>(output_activation_min - output_offset, clamped_result);
+ output_data[i] = output_offset + clamped_result;
+ }
+}
+
+inline void Sub16(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape,
+ const int16_t *input1_data, const RuntimeShape &input2_shape,
+ const int16_t *input2_data, const RuntimeShape &output_shape,
+ int16_t *output_data)
+{
+ ruy::profiler::ScopeLabel label("Sub/Int16");
+ const int input1_shift = params.input1_shift;
+ const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+ const int16 output_activation_min = params.quantized_activation_min;
+ const int16 output_activation_max = params.quantized_activation_max;
+
+ TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
+ TFLITE_DCHECK_LE(input1_shift, 0);
+ TFLITE_DCHECK_LE(params.input2_shift, 0);
+ const int16 *not_shift_input = input1_shift == 0 ? input1_data : input2_data;
+ const int16 *shift_input = input1_shift == 0 ? input2_data : input1_data;
+ const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift;
+
+ if (input1_shift == 0)
+ {
+ // F0 uses 0 integer bits, range [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+ for (int i = 0; i < flat_size; ++i)
+ {
+ F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+ F0 scaled_input =
+ F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+ F0 result = SaturatingSub(input_ready_scaled, scaled_input);
+ const int16 raw_output = result.raw();
+ const int16 clamped_output =
+ std::min(output_activation_max, std::max(output_activation_min, raw_output));
+ output_data[i] = clamped_output;
+ }
+ }
+ else
+ {
+ // F0 uses 0 integer bits, range [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+ for (int i = 0; i < flat_size; ++i)
+ {
+ F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+ F0 scaled_input =
+ F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+ F0 result = SaturatingSub(scaled_input, input_ready_scaled);
+ const int16 raw_output = result.raw();
+ const int16 clamped_output =
+ std::min(output_activation_max, std::max(output_activation_min, raw_output));
+ output_data[i] = clamped_output;
+ }
+ }
+}
+
+template <typename Scalar>
+void Pack(const PackParams ¶ms, const RuntimeShape *const *input_shapes,
+ const Scalar *const *input_data, const RuntimeShape &output_shape, Scalar *output_data)
+{
+ ruy::profiler::ScopeLabel label("Pack");
+ const int dimensions = output_shape.DimensionsCount();
+ int axis = params.axis;
+ int inputs_count = params.inputs_count;
+
+ int outer_size = 1;
+ for (int i = 0; i < axis; i++)
+ {
+ outer_size *= output_shape.Dims(i);
+ }
+ int copy_size = 1;
+ for (int i = params.axis + 1; i < dimensions; i++)
+ {
+ copy_size *= output_shape.Dims(i);
+ }
+ TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+ for (int i = 0; i < inputs_count; ++i)
+ {
+ for (int k = 0; k < outer_size; k++)
+ {
+ const Scalar *input_ptr = input_data[i] + copy_size * k;
+ int loc = k * inputs_count * copy_size + i * copy_size;
+ memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar));
+ }
+ }
+}
+
+template <typename Scalar>
+void Unpack(const UnpackParams ¶ms, const RuntimeShape &input_shape, const Scalar *input_data,
+ const RuntimeShape &output_shape, Scalar *const *output_datas)
+{
+ ruy::profiler::ScopeLabel label("Unpack");
+ const int dimensions = input_shape.DimensionsCount();
+ const int outputs_count = params.num_split;
+
+ int outer_size = 1;
+ int axis = params.axis;
+ if (axis < 0)
+ {
+ axis += dimensions;
+ }
+ TFLITE_DCHECK_GE(axis, 0);
+ TFLITE_DCHECK_LT(axis, dimensions);
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+ int copy_size = 1;
+ for (int i = axis + 1; i < dimensions; ++i)
+ {
+ copy_size *= input_shape.Dims(i);
+ }
+ TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size);
+
+ for (int i = 0; i < outputs_count; ++i)
+ {
+ for (int k = 0; k < outer_size; k++)
+ {
+ Scalar *output_ptr = output_datas[i] + copy_size * k;
+ int loc = k * outputs_count * copy_size + i * copy_size;
+ memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+ }
+ }
+}
+
+template <typename Scalar>
+void PackWithScaling(const PackParams ¶ms, const RuntimeShape *const *input_shapes,
+ const uint8 *const *input_data, const RuntimeShape &output_shape,
+ uint8 *output_data)
+{
+ ruy::profiler::ScopeLabel label("PackWithScaling");
+ const int dimensions = output_shape.DimensionsCount();
+ int axis = params.axis;
+ const int32 *input_zeropoint = params.input_zeropoint;
+ const float *input_scale = params.input_scale;
+ int inputs_count = params.inputs_count;
+ const int32 output_zeropoint = params.output_zeropoint;
+ const float output_scale = params.output_scale;
+
+ int outer_size = 1;
+ for (int i = 0; i < axis; i++)
+ {
+ outer_size *= output_shape.Dims(i);
+ }
+ int copy_size = 1;
+ for (int i = axis + 1; i < dimensions; i++)
+ {
+ copy_size *= output_shape.Dims(i);
+ }
+ TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+ Scalar *output_ptr = output_data;
+ const float inverse_output_scale = 1.f / output_scale;
+ for (int k = 0; k < outer_size; k++)
+ {
+ for (int i = 0; i < inputs_count; ++i)
+ {
+ if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale)
+ {
+ memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar));
+ }
+ else
+ {
+ assert(false);
+ const float scale = input_scale[i] * inverse_output_scale;
+ const float bias = -input_zeropoint[i] * scale;
+ auto input_ptr = input_data[i];
+ for (int j = 0; j < copy_size; ++j)
+ {
+ const int value =
+ static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+ output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
+ }
+ }
+ output_ptr += copy_size;
+ }
+ }
+}
+
+template <typename Scalar>
+void DepthConcatenation(const ConcatenationParams ¶ms, const RuntimeShape *const *input_shapes,
+ const Scalar *const *input_data, const RuntimeShape &output_shape,
+ Scalar *output_data)
+{
+ ruy::profiler::ScopeLabel label("DepthConcatenation");
+ auto params_copy = params;
+ params_copy.axis = 3;
+ Concatenation(params_copy, input_shapes, input_data, output_shape, output_data);
+}
+
+inline void LstmCell(const LstmCellParams ¶ms, const RuntimeShape &unextended_input_shape,
+ const float *input_data, const RuntimeShape &unextended_prev_activ_shape,
+ const float *prev_activ_data, const RuntimeShape &weights_shape,
+ const float *weights_data, const RuntimeShape &unextended_bias_shape,
+ const float *bias_data, const RuntimeShape &unextended_prev_state_shape,
+ const float *prev_state_data,
+ const RuntimeShape &unextended_output_state_shape, float *output_state_data,
+ const RuntimeShape &unextended_output_activ_shape, float *output_activ_data,
+ const RuntimeShape &unextended_concat_temp_shape, float *concat_temp_data,
+ const RuntimeShape &unextended_activ_temp_shape, float *activ_temp_data)
+{
+ TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+ const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+ const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+ const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+ const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+ const RuntimeShape output_state_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+ const RuntimeShape output_activ_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+ const RuntimeShape concat_temp_shape =
+ RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+ const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+ TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+ const int weights_dim_count = weights_shape.DimensionsCount();
+ const int batches = MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
+ output_state_shape, 0, output_activ_shape, 0);
+ const int height = MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
+ output_state_shape, 1, output_activ_shape, 1);
+ const int width = MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
+ output_state_shape, 2, output_activ_shape, 2);
+ const int input_depth = input_shape.Dims(3);
+ const int prev_activ_depth = prev_activ_shape.Dims(3);
+ const int total_input_depth = prev_activ_depth + input_depth;
+ TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+ TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+ const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+ TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+ TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+ const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+ 3, output_activ_shape, 3);
+ TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+
+ // Concatenate prev_activ and input data together
+ std::vector<float const *> concat_input_arrays_data;
+ std::vector<RuntimeShape const *> concat_input_arrays_shapes;
+ concat_input_arrays_data.push_back(input_data);
+ concat_input_arrays_data.push_back(prev_activ_data);
+ concat_input_arrays_shapes.push_back(&input_shape);
+ concat_input_arrays_shapes.push_back(&prev_activ_shape);
+ tflite::ConcatenationParams concat_params;
+ concat_params.axis = 3;
+ concat_params.inputs_count = concat_input_arrays_data.size();
+ Concatenation(concat_params, &(concat_input_arrays_shapes[0]), &(concat_input_arrays_data[0]),
+ concat_temp_shape, concat_temp_data);
+
+ // Fully connected
+ tflite::FullyConnectedParams fc_params;
+ fc_params.float_activation_min = std::numeric_limits<float>::lowest();
+ fc_params.float_activation_max = std::numeric_limits<float>::max();
+ FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, weights_data,
+ bias_shape, bias_data, activ_temp_shape, activ_temp_data);
+
+ // Memory state update (the LSTM "guts")
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int w = 0; w < width; ++w)
+ {
+ for (int h = 0; h < height; ++h)
+ {
+ for (int c = 0; c < output_depth; ++c)
+ {
+ const float input_gate =
+ 1.f /
+ (1.f +
+ std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 0 * output_depth + c)]));
+ const float new_input =
+ std::tanh(activ_temp_data[Offset(activ_temp_shape, b, h, w, 1 * output_depth + c)]);
+ const float forget_gate =
+ 1.f /
+ (1.f +
+ std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 2 * output_depth + c)]));
+ const float output_gate =
+ 1.f /
+ (1.f +
+ std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 3 * output_depth + c)]));
+ const float new_state =
+ input_gate * new_input +
+ forget_gate * prev_state_data[Offset(prev_state_shape, b, h, w, c)];
+ output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
+ output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
+ output_gate * std::tanh(new_state);
+ }
+ }
+ }
+ }
+}
+
+// Quantized LSTM cell implementation.
+// The quantization of the input, output arrays is as follows:
+// - The input activations are quantized as uint8 on the interval
+// [-1, 127/128].
+// The rationale for that is that is the natural interval for output
+// activations (see next point) and these need to be concatenated together.
+// We could accommodate different ranges by re-scaling, but we empirically
+// found that setting the input activations range to be [-1, 127/128] in the
+// first place, removing the need for re-scaling, greatly improves accuracy.
+// - The output activations are quantized as uint8 on the interval
+// [-1, 127/128].
+// The rationale for that is that the definition of a LSTM cell makes them
+// intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128]
+// makes for simpler, more accurate fixed-point arithmetic.
+// - The output-at-previous-timestep state array is obviously quantized as
+// the output activations.
+// - The internal LSTM memory (not the output-at-previous-timestep, the other
+// internal state array) is int16-quantized and may use any power-of-two,
+// symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call
+// StateIntegerBits below, see the below discussion of that template
+// parameter ("The StateIntegerBits template parameter").
+// - The output of the internal fully-connected node is int16-quantized
+// on the interval [-8, 8 * 32767/32768], the rationale for which is
+// explained just below ("Why [-8, 8] for fully-connected output?").
+//
+//
+// === The StateIntegerBits template parameter ===
+//
+// The StateIntegerBits template parameter controls the fixed-point format used
+// to represent the internal memory of the LSTM cell (not the
+// output-at-previous-timestep, the other internal state array). It's currently
+// a template parameter so that the model can control that. The most typical
+// value for StateIntegerBits is 4. Other plausible values are anywhere between
+// 3 and 5. We might eventually standardize on a single supported value, e.g. 4,
+// and drop that template parameter. The reason why it can't be a runtime
+// parameter is that this controls the fixed-point format used, i.e. we need to
+// generate actually different code based on it. In particular, we generate code
+// for a fixed-point tanh() implementation for that format, which internally
+// uses a fixed-point exp() implementation, which internally uses a
+// barrel-shifter with a number of steps that depends on StateIntegerBits.
+// Another consequence of that is that a higher value of StateIntegerBits
+// results in a more expensive implementation (more barrel shifter steps
+// needed).
+//
+//
+// === Why [-8, 8] for fully-connected output? ===
+//
+// This array is only fed to Logistic and Tanh functions, for which
+// the quantized implementation will want to use fixed-point arithmetic,
+// requiring a power-of-two representation interval. Thus, we should right
+// away quantize this array to a power-of-two interval; otherwise,
+// implementation will need to rescale that, losing any benefit that a tighter
+// representation interval might otherwise yield, while introducing some
+// numerical error and computational overhead.
+//
+// Now, Logistic and Tanh
+// are nearly constant (nearly equal to their horizontal asymptotes)
+// outside of a small bounded interval around 0:
+//
+// Logistic(4) = 1 - 1.8e-2 Tanh(4) = 1 - 6.7e-4
+// Logistic(8) = 1 - 3.4e-4 Tanh(8) = 1 - 2.3e-7
+// Logistic(16) = 1 - 1.1e-7 Tanh(16) = 1 - 2.5e-14
+//
+// From this, we see that clamping to [-4, 4] would be too inaccurate
+// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision)
+// while clamping to [-16, 16] would make no difference even in float32.
+// However, for a fixed-point implementation in 16-bit integers, using 5
+// integer bits to represent the [-16, 16] range would leave only 11
+// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
+// representable values. Notice that is higher than the
+// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
+// Using [-8, 8] thus seems like the better compromise overall, enjoying
+// an increment of 2.4e-4 between representable values and a worst-case
+// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with
+// [-16, 16].
+//
+// Moreover, all other things being equal, it is nice to choose the narrower
+// representation range, as that makes the implementation of fixed-point
+// math functions a little cheaper (each integer bit requires an additional
+// barrel-shifter atep in the implementation of exp(-x)). That is further
+// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make
+// sense for 32-bit float or 32-bit fixed-point quantization, but we are
+// aiming for 16-bit fixed-point quantization of these internal nodes here.
+//
+template <int StateIntegerBits>
+inline void
+LstmCell(const LstmCellParams ¶ms, const RuntimeShape &unextended_input_shape,
+ const uint8 *input_data_uint8, const RuntimeShape &unextended_prev_activ_shape,
+ const uint8 *prev_activ_data_uint8, const RuntimeShape &weights_shape,
+ const uint8 *weights_data_uint8, const RuntimeShape &unextended_bias_shape,
+ const int32 *bias_data_int32, const RuntimeShape &unextended_prev_state_shape,
+ const int16 *prev_state_data_int16, const RuntimeShape &unextended_output_state_shape,
+ int16 *output_state_data_int16, const RuntimeShape &unextended_output_activ_shape,
+ uint8 *output_activ_data_uint8, const RuntimeShape &unextended_concat_temp_shape,
+ uint8 *concat_temp_data_uint8, const RuntimeShape &unextended_activ_temp_shape,
+ int16 *activ_temp_data_int16, void *gemmlowp_context)
+{
+ (void)gemmlowp_context; // only used in optimized code.
+ int32 weights_zero_point = params.weights_zero_point;
+ int32 accum_multiplier = params.accum_multiplier;
+ int accum_shift = params.accum_shift;
+ TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+ const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+ const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+ const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+ const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+ const RuntimeShape output_state_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+ const RuntimeShape output_activ_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+ const RuntimeShape concat_temp_shape =
+ RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+ const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+ TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+ // Gather dimensions information, and perform consistency checks.
+ const int weights_dim_count = weights_shape.DimensionsCount();
+ const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, prev_activ_shape, prev_state_shape,
+ output_state_shape, output_activ_shape);
+ const int input_depth = input_shape.Dims(3);
+ const int prev_activ_depth = prev_activ_shape.Dims(3);
+ const int total_input_depth = prev_activ_depth + input_depth;
+ TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+ const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+ TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+ TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+ TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+ const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+ 3, output_activ_shape, 3);
+ TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+ const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
+ const int fc_output_depth =
+ MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
+ const int fc_accum_depth = total_input_depth;
+ TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
+
+ // Depth-concatenate prev_activ and input data together.
+ uint8 const *concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8};
+ const RuntimeShape *concat_input_arrays_shapes[2] = {&input_shape, &prev_activ_shape};
+ tflite::ConcatenationParams concat_params;
+ concat_params.axis = 3;
+ concat_params.inputs_count = 2;
+ Concatenation(concat_params, concat_input_arrays_shapes, concat_input_arrays_data,
+ concat_temp_shape, concat_temp_data_uint8);
+
+ // Implementation of the fully connected node inside the LSTM cell.
+ // The operands are 8-bit integers, the accumulators are internally 32bit
+ // integers, and the output is 16-bit fixed-point with 3 integer bits so
+ // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
+ // is explained in the function comment above.
+ for (int b = 0; b < fc_batches; ++b)
+ {
+ for (int out_c = 0; out_c < fc_output_depth; ++out_c)
+ {
+ // Internal accumulation.
+ // Initialize accumulator with the bias-value.
+ int32 accum = bias_data_int32[out_c];
+ // Accumulation loop.
+ for (int d = 0; d < fc_accum_depth; ++d)
+ {
+ int16 input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
+ int16 weights_val = weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
+ accum += input_val * weights_val;
+ }
+ // Down-scale the final int32 accumulator to the scale used by our
+ // (16-bit, using 3 integer bits) fixed-point format. The quantized
+ // multiplier and shift here have been pre-computed offline
+ // (e.g. by toco).
+ accum = MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
+ // Saturate, cast to int16, and store to the temporary activations array.
+ accum = std::max(-32768, std::min(32767, static_cast<int>(accum)));
+ activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
+ }
+ }
+
+ // Rest of the LSTM cell: tanh and logistic math functions, and some adds
+ // and muls, all done in 16-bit fixed-point.
+ for (int b = 0; b < outer_size; ++b)
+ {
+ for (int c = 0; c < output_depth; ++c)
+ {
+ // Define the fixed-point data types that we will use here. All use
+ // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
+ // They only differ by the number of integral vs. fractional bits,
+ // determining the range of values that they can represent.
+ //
+ // F0 uses 0 integer bits, range [-1, 1].
+ // This is the return type of math functions such as tanh, logistic,
+ // whose range is in [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+ // F3 uses 3 integer bits, range [-8, 8].
+ // This is the range of the previous fully-connected node's output,
+ // which is our input here.
+ using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
+ // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
+ // 2^StateIntegerBits]. It's used to represent the internal state, whose
+ // number of integer bits is currently dictated by the model. See comment
+ // on the StateIntegerBits template parameter above.
+ using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
+ // Implementation of input gate, using fixed-point logistic function.
+ F3 input_gate_input =
+ F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
+ F0 input_gate_output = gemmlowp::logistic(input_gate_input);
+ // Implementation of input modulation gate, using fixed-point tanh
+ // function.
+ F3 input_modulation_gate_input =
+ F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
+ F0 input_modulation_gate_output = gemmlowp::tanh(input_modulation_gate_input);
+ // Implementation of forget gate, using fixed-point logistic function.
+ F3 forget_gate_input =
+ F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
+ F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
+ // Implementation of output gate, using fixed-point logistic function.
+ F3 output_gate_input =
+ F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
+ F0 output_gate_output = gemmlowp::logistic(output_gate_input);
+ // Implementation of internal multiplication nodes, still in fixed-point.
+ F0 input_times_input_modulation = input_gate_output * input_modulation_gate_output;
+ FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
+ FS prev_state_times_forget_state = forget_gate_output * prev_state;
+ // Implementation of internal addition node, saturating.
+ FS new_state =
+ gemmlowp::SaturatingAdd(gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
+ prev_state_times_forget_state);
+ // Implementation of last internal Tanh node, still in fixed-point.
+ // Since a Tanh fixed-point implementation is specialized for a given
+ // number or integer bits, and each specialization can have a substantial
+ // code size, and we already used above a Tanh on an input with 3 integer
+ // bits, and per the table in the above function comment there is no
+ // significant accuracy to be lost by clamping to [-8, +8] for a
+ // 3-integer-bits representation, let us just do that. This helps people
+ // porting this to targets where code footprint must be minimized.
+ F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
+ F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
+ // Store the new internal state back to memory, as 16-bit integers.
+ // Note: here we store the original value with StateIntegerBits, not
+ // the rescaled 3-integer-bits value fed to tanh.
+ output_state_data_int16[b * output_depth + c] = new_state.raw();
+ // Down-scale the output activations to 8-bit integers, saturating,
+ // and store back to memory.
+ int16 rescaled_output_activ = gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
+ int16 clamped_output_activ =
+ std::max<int16>(-128, std::min<int16>(127, rescaled_output_activ));
+ output_activ_data_uint8[b * output_depth + c] = 128 + clamped_output_activ;
+ }
+ }
+}
+
+template <typename Scalar>
+void Split(const SplitParams ¶ms, const RuntimeShape &input_shape, const Scalar *input_data,
+ const RuntimeShape *const *output_shapes, Scalar *const *output_data)
+{
+ ruy::profiler::ScopeLabel label("Split");
+ const int split_dimensions = input_shape.DimensionsCount();
+ int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis;
+ int outputs_count = params.num_split;
+ TFLITE_DCHECK_LT(axis, split_dimensions);
+
+ int64_t split_size = 0;
+ for (int i = 0; i < outputs_count; i++)
+ {
+ TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions);
+ for (int j = 0; j < split_dimensions; j++)
+ {
+ if (j != axis)
+ {
+ MatchingDim(*output_shapes[i], j, input_shape, j);
+ }
+ }
+ split_size += output_shapes[i]->Dims(axis);
+ }
+ TFLITE_DCHECK_EQ(split_size, input_shape.Dims(axis));
+ int64_t outer_size = 1;
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+ // For all output arrays,
+ // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+ int64_t base_inner_size = 1;
+ for (int i = axis + 1; i < split_dimensions; ++i)
+ {
+ base_inner_size *= input_shape.Dims(i);
+ }
+
+ const Scalar *input_ptr = input_data;
+ for (int k = 0; k < outer_size; k++)
+ {
+ for (int i = 0; i < outputs_count; ++i)
+ {
+ const int copy_size = output_shapes[i]->Dims(axis) * base_inner_size;
+ memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar));
+ input_ptr += copy_size;
+ }
+ }
+}
+
+inline int NodeOffset(int b, int h, int w, int height, int width)
+{
+ return (b * height + h) * width + w;
+}
+
+inline void LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params,
+ const RuntimeShape &input_shape, const float *input_data,
+ const RuntimeShape &output_shape, float *output_data)
+{
+ const int trailing_dim = input_shape.DimensionsCount() - 1;
+ const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+ const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int c = 0; c < depth; ++c)
+ {
+ const int begin_input_c = std::max(0, static_cast<int>(c - op_params.range));
+ const int end_input_c = std::min(depth, static_cast<int>(c + op_params.range));
+ float accum = 0.f;
+ for (int input_c = begin_input_c; input_c < end_input_c; ++input_c)
+ {
+ const float input_val = input_data[i * depth + input_c];
+ accum += input_val * input_val;
+ }
+ const float multiplier = std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta);
+ output_data[i * depth + c] = input_data[i * depth + c] * multiplier;
+ }
+ }
+}
+
+inline void Dequantize(const RuntimeShape &input_shape, const Eigen::half *input_data,
+ const RuntimeShape &output_shape, float *output_data)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; i++)
+ {
+ output_data[i] = static_cast<float>(input_data[i]);
+ }
+}
+
+inline void FakeQuant(const tflite::FakeQuantParams &op_params, const RuntimeShape &input_shape,
+ const float *input_data, const RuntimeShape &output_shape, float *output_data)
+{
+ ruy::profiler::ScopeLabel label("FakeQuant");
+ float rmin = op_params.minmax.min;
+ float rmax = op_params.minmax.max;
+ int num_bits = op_params.num_bits;
+ // 0 should always be a representable value. Let's assume that the initial
+ // min,max range contains 0.
+ TFLITE_DCHECK_LE(rmin, 0.0f);
+ TFLITE_DCHECK_GE(rmax, 0.0f);
+ TFLITE_DCHECK_LT(rmin, rmax);
+
+ // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor.
+ int quant_min = 0;
+ int quant_max = (1 << num_bits) - 1;
+ float nudged_min, nudged_max, nudged_scale;
+ NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale);
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ FakeQuantizeArray(nudged_scale, nudged_min, nudged_max, input_data, output_data, flat_size);
+}
+
+// Common subroutine for both `GatherNd` and `GatherNdString`.
+struct GatherNdHelperResult
+{
+ int n_slices;
+ int slice_size;
+ int indices_nd;
+ std::vector<int> dims_to_count;
+};
+
+// Returns common values being used on both `GatherNd` and `GatherNdString`.
+inline GatherNdHelperResult GatherNdHelper(const RuntimeShape ¶ms_shape,
+ const RuntimeShape &indices_shape)
+{
+ GatherNdHelperResult ret;
+ ret.n_slices = 1;
+ ret.slice_size = 1;
+ const int indices_dims = indices_shape.DimensionsCount();
+ ret.indices_nd = indices_shape.Dims(indices_dims - 1);
+ const int params_dims = params_shape.DimensionsCount();
+ for (int i = 0; i < indices_dims - 1; ++i)
+ {
+ ret.n_slices *= indices_shape.Dims(i);
+ }
+ for (int i = ret.indices_nd; i < params_dims; ++i)
+ {
+ ret.slice_size *= params_shape.Dims(i);
+ }
+
+ int remain_flat_size = params_shape.FlatSize();
+ ret.dims_to_count = std::vector<int>(ret.indices_nd, 0);
+ for (int i = 0; i < ret.indices_nd; ++i)
+ {
+ ret.dims_to_count[i] = remain_flat_size / params_shape.Dims(i);
+ remain_flat_size = ret.dims_to_count[i];
+ }
+
+ return ret;
+}
+
+template <typename ParamsT, typename IndicesT = int32>
+inline void GatherNd(const RuntimeShape ¶ms_shape, const ParamsT *params_data,
+ const RuntimeShape &indices_shape, const IndicesT *indices_data,
+ const RuntimeShape &output_shape, ParamsT *output_data)
+{
+ ruy::profiler::ScopeLabel label("GatherNd");
+
+ const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+ for (int i = 0; i < res.n_slices; ++i)
+ {
+ int from_pos = 0;
+ for (int j = 0; j < res.indices_nd; ++j)
+ {
+ from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+ }
+ std::memcpy(output_data + i * res.slice_size, params_data + from_pos,
+ sizeof(ParamsT) * res.slice_size);
+ }
+}
+
+#ifndef TF_LITE_STATIC_MEMORY
+template <typename IndicesT = int32>
+inline void GatherNdString(const RuntimeShape ¶ms_shape, const TfLiteTensor *params_data,
+ const RuntimeShape &indices_shape, const IndicesT *indices_data,
+ const RuntimeShape &output_shape, TfLiteTensor *output_data)
+{
+ ruy::profiler::ScopeLabel label("GatherNdString");
+
+ const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+ DynamicBuffer buffer;
+ for (int i = 0; i < res.n_slices; ++i)
+ {
+ int from_pos = 0;
+ for (int j = 0; j < res.indices_nd; ++j)
+ {
+ from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+ }
+ for (int j = 0; j < res.slice_size; ++j)
+ {
+ buffer.AddString(GetString(params_data, from_pos + j));
+ }
+ }
+ buffer.WriteToTensor(output_data, /*new_shape=*/nullptr);
+}
+#endif
+
+template <typename IndicesT, typename UpdatesT>
+inline void ScatterNd(const RuntimeShape &indices_shape, const IndicesT *indices_data,
+ const RuntimeShape &updates_shape, const UpdatesT *updates_data,
+ const RuntimeShape &output_shape, UpdatesT *output_data)
+{
+ ruy::profiler::ScopeLabel label("ScatterNd");
+
+ int n_slices = 1;
+ int slice_size = 1;
+ const int outer_dims = indices_shape.DimensionsCount() - 1;
+ const int indices_nd = indices_shape.Dims(outer_dims);
+ const int updates_dims = updates_shape.DimensionsCount();
+ for (int i = 0; i < outer_dims; ++i)
+ {
+ n_slices *= indices_shape.Dims(i);
+ }
+ for (int i = outer_dims; i < updates_dims; ++i)
+ {
+ slice_size *= updates_shape.Dims(i);
+ }
+
+ int output_flat_size = output_shape.FlatSize();
+ int remain_flat_size = output_flat_size;
+ std::vector<int> dims_to_count(indices_nd, 0);
+ for (int i = 0; i < indices_nd; ++i)
+ {
+ dims_to_count[i] = remain_flat_size / output_shape.Dims(i);
+ remain_flat_size = dims_to_count[i];
+ }
+
+ memset(output_data, 0, sizeof(UpdatesT) * output_flat_size);
+ for (int i = 0; i < n_slices; ++i)
+ {
+ int to_pos = 0;
+ for (int j = 0; j < indices_nd; ++j)
+ {
+ IndicesT idx = indices_data[i * indices_nd + j];
+ TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j));
+ to_pos += idx * dims_to_count[j];
+ }
+ for (int j = 0; j < slice_size; j++)
+ {
+ output_data[to_pos + j] += updates_data[i * slice_size + j];
+ }
+ }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+ const RuntimeShape &output_shape, SequentialTensorWriter<T> *writer)
+{
+ const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape);
+ TFLITE_DCHECK_LE(op_params.begin_count, 5);
+ TFLITE_DCHECK_LE(op_params.size_count, 5);
+ const int begin_count = op_params.begin_count;
+ const int size_count = op_params.size_count;
+ // We front-pad the begin and size vectors.
+ std::array<int, 5> start;
+ std::array<int, 5> stop;
+ for (int i = 0; i < 5; ++i)
+ {
+ int padded_i = 5 - i;
+ start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
+ stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
+ ? ext_shape.Dims(i)
+ : start[i] + op_params.size[size_count - padded_i];
+ }
+
+ for (int i0 = start[0]; i0 < stop[0]; ++i0)
+ {
+ for (int i1 = start[1]; i1 < stop[1]; ++i1)
+ {
+ for (int i2 = start[2]; i2 < stop[2]; ++i2)
+ {
+ for (int i3 = start[3]; i3 < stop[3]; ++i3)
+ {
+ for (int i4 = start[4]; i4 < stop[4]; ++i4)
+ {
+ writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4));
+ }
+ }
+ }
+ }
+ }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+ const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+ SequentialTensorWriter<T> writer(input_data, output_data);
+ return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+ const TfLiteTensor *input, const RuntimeShape &output_shape, TfLiteTensor *output)
+{
+ SequentialTensorWriter<T> writer(input, output);
+ return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+ auto min_value = input2_data[0];
+ for (int i = 0; i < flat_size; i++)
+ {
+ output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i];
+ }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+ const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+ // Drop shape of second input: not needed.
+ Minimum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T>
+void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+ auto max_value = input2_data[0];
+ for (int i = 0; i < flat_size; i++)
+ {
+ output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i];
+ }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+ const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+ // Drop shape of second input: not needed.
+ Maximum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T1, typename T2, typename T3>
+void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data,
+ const RuntimeShape &output_shape, T2 *output_data)
+{
+ ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, std::greater<T1>());
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T1, typename T2, typename T3>
+inline void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data,
+ const RuntimeShape &input2_shape, const T3 *input2_data,
+ const RuntimeShape &output_shape, T2 *output_data)
+{
+ // Drop shape of second input: not needed.
+ ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename D, typename T>
+void Select(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+ const RuntimeShape &input_x_shape, const T *input_x_data,
+ const RuntimeShape &input_y_shape, const T *input_y_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ int64_t flatsize;
+ // Allow select operator executions on mixed scalar tensors and one element
+ // tensors.
+ if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 &&
+ input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1)
+ {
+ flatsize = 1;
+ }
+ else
+ {
+ flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape);
+ }
+ for (int64_t i = 0; i < flatsize; ++i)
+ {
+ output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i];
+ }
+}
+
+template <typename D, typename T>
+void RankOneSelect(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+ const RuntimeShape &input_x_shape, const T *input_x_data,
+ const RuntimeShape &input_y_shape, const T *input_y_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int64_t outer_size = input_condition_shape.FlatSize();
+ int64_t inner_size;
+ if (input_condition_shape.DimensionsCount() == 0)
+ {
+ inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
+ }
+ else
+ {
+ TFLITE_DCHECK_EQ(MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), outer_size);
+ inner_size = MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
+ }
+
+ int64_t offset = 0;
+ for (int64_t i = 0; i < outer_size; i++)
+ {
+ const T *input_data = input_condition_data[i] ? input_x_data : input_y_data;
+ memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
+ offset += inner_size;
+ }
+}
+
+template <typename D, typename T>
+void BroadcastSelect4DSlow(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+ const RuntimeShape &input_x_shape, const T *input_x_data,
+ const RuntimeShape &input_y_shape, const T *input_y_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
+
+ const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
+
+ NdArrayDesc<4> desc_condition;
+ NdArrayDesc<4> desc_x;
+ NdArrayDesc<4> desc_y;
+ NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, input_y_shape,
+ &desc_condition, &desc_x, &desc_y);
+
+ // In Tensorflow, the dimensions are canonically named (batch_number, row,
+ // col, channel), with extents (batches, height, width, depth), with the
+ // trailing dimension changing most rapidly (channels has the smallest
+ // stride, typically 1 element).
+ //
+ // In generated C code, we store arrays with the dimensions reversed. The
+ // first dimension has smallest stride.
+ //
+ // We name our variables by their Tensorflow convention, but generate C code
+ // nesting loops such that the innermost loop has the smallest stride for
+ // the best cache behavior.
+ for (int b = 0; b < extended_output_shape.Dims(0); ++b)
+ {
+ for (int y = 0; y < extended_output_shape.Dims(1); ++y)
+ {
+ for (int x = 0; x < extended_output_shape.Dims(2); ++x)
+ {
+ for (int c = 0; c < extended_output_shape.Dims(3); ++c)
+ {
+ const int condition_index = SubscriptToIndex(desc_condition, b, y, x, c);
+ const int x_index = SubscriptToIndex(desc_x, b, y, x, c);
+ const int y_index = SubscriptToIndex(desc_y, b, y, x, c);
+ output_data[Offset(extended_output_shape, b, y, x, c)] =
+ input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index];
+ }
+ }
+ }
+ }
+}
+
+template <typename D, typename T>
+void SelectTrueCoords(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+ T *output_data)
+{
+ const size_t size = input_condition_shape.FlatSize();
+ if (size == 0)
+ {
+ // Dimension is zero, in which case we don't need to output.
+ return;
+ }
+ const size_t cond_rank = input_condition_shape.DimensionsCount();
+
+ std::vector<int> dims_to_count(cond_rank, 0);
+ int cur_flat_size = size;
+ for (int i = 0; i < cond_rank; ++i)
+ {
+ dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i);
+ cur_flat_size = dims_to_count[i];
+ }
+
+ int output_index = 0;
+ for (int i = 0; i < size; ++i)
+ {
+ if (input_condition_data[i])
+ {
+ // Insert the coordinate of the current item (row major) into output.
+ int flat_index = i;
+ for (int j = 0; j < cond_rank; ++j)
+ {
+ int coord_j = flat_index / dims_to_count[j];
+ output_data[output_index * cond_rank + j] = coord_j;
+ flat_index %= dims_to_count[j];
+ }
+ output_index++;
+ }
+ }
+}
+
+// For easy implementation, the indices is always a vector of size-4 vectors.
+template <typename T, typename TI>
+inline void SparseToDense(const std::vector<std::vector<TI>> &indices, const T *values,
+ T default_value, bool value_is_scalar,
+ const RuntimeShape &unextended_output_shape, T *output_data)
+{
+ TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+ const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+ const int value_count = indices.size();
+
+ // First fill the output_data with default value.
+ const int num_elements = output_shape.FlatSize();
+ for (int i = 0; i < num_elements; ++i)
+ {
+ output_data[i] = default_value;
+ }
+
+ // Special handle for value is scalar case to avoid checking the boolean
+ // condition within the loop every time.
+ if (value_is_scalar)
+ {
+ for (int i = 0; i < value_count; ++i)
+ {
+ const std::vector<TI> &index = indices[i];
+ TFLITE_DCHECK_EQ(index.size(), 4);
+ const T value = *values; // just use the first value.
+ output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+ }
+ return;
+ }
+
+ // Go through the values and indices to fill the sparse values.
+ for (int i = 0; i < value_count; ++i)
+ {
+ const std::vector<TI> &index = indices[i];
+ TFLITE_DCHECK_EQ(index.size(), 4);
+ const T value = values[i];
+ output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+ }
+}
+
+template <typename T>
+inline void Pow(const RuntimeShape &input1_shape, const T *input1_data,
+ const RuntimeShape &input2_shape, const T *input2_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ output_data[i] = std::pow(input1_data[i], input2_data[i]);
+ }
+}
+
+template <typename T>
+inline void BroadcastPow4DSlow(const RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const RuntimeShape &unextended_input2_shape, const T *input2_data,
+ const RuntimeShape &unextended_output_shape, T *output_data)
+{
+ TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+ const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+ NdArrayDesc<4> desc1;
+ NdArrayDesc<4> desc2;
+ NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
+ &desc2);
+
+ for (int b = 0; b < output_shape.Dims(0); ++b)
+ {
+ for (int y = 0; y < output_shape.Dims(1); ++y)
+ {
+ for (int x = 0; x < output_shape.Dims(2); ++x)
+ {
+ for (int c = 0; c < output_shape.Dims(3); ++c)
+ {
+ auto out_idx = Offset(output_shape, b, y, x, c);
+ auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+ auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+ auto in1_val = input1_data[in1_idx];
+ auto in2_val = input2_data[in2_idx];
+ output_data[out_idx] = std::pow(in1_val, in2_val);
+ }
+ }
+ }
+ }
+}
+
+template <typename Scalar>
+void Reverse(int axis, const RuntimeShape &input_shape, const Scalar *input_data,
+ const RuntimeShape &output_shape, Scalar *output_data)
+{
+ ruy::profiler::ScopeLabel label("Reverse");
+
+ int outer_size = 1;
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+
+ int copy_size = 1;
+ for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i)
+ {
+ copy_size *= input_shape.Dims(i);
+ }
+
+ const int dims_at_axis = input_shape.Dims(axis);
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int j = 0; j < dims_at_axis; ++j)
+ {
+ const int start_pos = (i * dims_at_axis + j) * copy_size;
+ Scalar *output_ptr = output_data + start_pos;
+ int loc = (i * dims_at_axis + dims_at_axis - j - 1) * copy_size;
+ memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+ }
+ }
+}
+
+template <typename Scalar, typename TS>
+void ReverseSequence(const TS *seq_lengths, const int seq_dim, const int batch_dim,
+ const RuntimeShape &input_shape, const Scalar *input_data,
+ const RuntimeShape &output_shape, Scalar *output_data)
+{
+ ruy::profiler::ScopeLabel label("ReverseSequence");
+
+ int outer_size = 1;
+ int outer_dim = std::min(batch_dim, seq_dim);
+ int medium_dim = std::max(batch_dim, seq_dim);
+ for (int i = 0; i < outer_dim; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+
+ int medium_size = 1;
+ for (int i = outer_dim + 1; i < medium_dim; ++i)
+ {
+ medium_size *= input_shape.Dims(i);
+ }
+
+ int copy_size = 1;
+ for (int i = medium_dim + 1; i < input_shape.DimensionsCount(); ++i)
+ {
+ copy_size *= input_shape.Dims(i);
+ }
+
+ const int dims_at_outer_dim = input_shape.Dims(outer_dim);
+ const int dims_at_medium_dim = input_shape.Dims(medium_dim);
+
+ Scalar *output_ptr;
+ if (batch_dim > seq_dim)
+ {
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int j = 0; j < dims_at_outer_dim; ++j)
+ {
+ const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+ for (int p = 0; p < medium_size; ++p)
+ {
+ for (int q = 0; q < dims_at_medium_dim; ++q)
+ {
+ const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+ const Scalar *in_ptr = input_data + in_pos;
+ int sl = seq_lengths[q] - 1;
+ if (j > sl)
+ {
+ output_ptr = output_data + in_pos;
+ }
+ else
+ {
+ const int out_pos_base = (i * dims_at_outer_dim + sl - j) * medium_size;
+ const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+ output_ptr = output_data + out_pos;
+ }
+ memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+ }
+ }
+ }
+ }
+ }
+ else if (batch_dim < seq_dim)
+ {
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int j = 0; j < dims_at_outer_dim; ++j)
+ {
+ const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+ int sl = seq_lengths[j] - 1;
+ const int out_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+ for (int p = 0; p < medium_size; ++p)
+ {
+ for (int q = 0; q < dims_at_medium_dim; ++q)
+ {
+ const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+ const Scalar *in_ptr = input_data + in_pos;
+ if (q > sl)
+ {
+ output_ptr = output_data + in_pos;
+ }
+ else
+ {
+ const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + sl - q) * copy_size;
+ output_ptr = output_data + out_pos;
+ }
+ memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+ }
+ }
+ }
+ }
+ }
+}
+
+template <typename T>
+inline void SegmentSum(const RuntimeShape &input_shape, const T *input_data,
+ const RuntimeShape &segment_ids_shape, const int32_t *segment_ids_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int segment_flat_size = MatchingFlatSizeSkipDim(input_shape, 0, output_shape);
+
+ memset(output_data, 0, sizeof(T) * output_shape.FlatSize());
+
+ for (int i = 0; i < input_shape.Dims(0); i++)
+ {
+ int output_index = segment_ids_data[i];
+ for (int j = 0; j < segment_flat_size; ++j)
+ {
+ output_data[output_index * segment_flat_size + j] += input_data[i * segment_flat_size + j];
+ }
+ }
+}
+
+} // namespace reference_ops
+} // namespace tflite
+
+#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
--- /dev/null
+macro(initialize_pal)
+ nnas_find_package(TensorFlowSource EXACT 2.8.0 REQUIRED)
+ nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 REQUIRED)
+ nnas_find_package(TensorFlowEigenSource EXACT 2.8.0 REQUIRED)
+ nnas_find_package(TensorFlowRuySource EXACT 2.8.0 REQUIRED)
+
+ if (NOT TensorFlowSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowGEMMLowpSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowEigenSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: Eigen not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowRuySource_FOUND)
+ message(STATUS "Skipping luci-interpreter: Ruy not found")
+ return()
+ endif ()
+ #find_package(Threads REQUIRED)
+
+ set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+ target_include_directories(${TGT} PRIVATE "${PAL}")
+ target_include_directories(${TGT} PRIVATE
+ "${TensorFlowRuySource_DIR}"
+ "${TensorFlowGEMMLowpSource_DIR}"
+ "${TensorFlowEigenSource_DIR}"
+ "${TensorFlowSource_DIR}")
+ target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+ # TODO put it back, I changed my mind.
+ # instead add sources with visitors in this library
+ set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
+ add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES})
+ set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
+ target_include_directories(luci_interpreter_mcu_pal PRIVATE
+ "${TensorFlowRuySource_DIR}"
+ "${TensorFlowGEMMLowpSource_DIR}"
+ "${TensorFlowEigenSource_DIR}"
+ "${TensorFlowSource_DIR}"
+ )
+
+ target_link_libraries(${TGT} PRIVATE luci_interpreter_mcu_pal)
+ #target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_mcu_pal)
+endmacro()
--- /dev/null
+include("${LUCI_INTERPRETER_PAL_DIR}/pal.cmake")
+
+initialize_pal()
+
+if (NOT PAL_INITIALIZED)
+ message("PAL Failed to initialize, skip luci-interpreter")
+ return()
+endif()
+
+message(STATUS "LUCI INTERPRETER BEGIN")
+
+set(LUCI_INTERPRETER_BINARY "luci_interpreter_micro${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_MEMORY_MANAGER "luci_interpreter_micro_memory_manager${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_CORE "luci_interpreter_core_micro${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels_micro${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader_micro${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_IMPORT "luci_interpreter_import_micro${LUCI_INTERPRETER_SUFFIX}")
+
+add_subdirectory(memory_managers)
+message(STATUS "LUCI INTERPRETER MEMORY MANAGER")
+add_subdirectory(core)
+message(STATUS "LUCI INTERPRETER CORE")
+add_subdirectory(kernels)
+message(STATUS "LUCI INTERPRETER KERNELS")
+add_subdirectory(loader)
+message(STATUS "LUCI INTERPRETER LOADER")
+
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC ${LUCI_INTERPRETER_KERNELS})
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC ${LUCI_INTERPRETER_KERNELS})
+
+message(STATUS "LUCI INTERPTER INITALIZED")
+
+set(SOURCES
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/Interpreter.h" Interpreter.cpp)
+
+add_library(${LUCI_INTERPRETER_BINARY} STATIC ${SOURCES})
+
+target_include_directories(${LUCI_INTERPRETER_BINARY} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_BINARY} PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_BINARY}
+ PUBLIC ${LUCI_INTERPRETER_MEMORY_MANAGER} ${LUCI_INTERPRETER_LOADER} ${LUCI_INTERPRETER_CORE})
+
+install(TARGETS ${LUCI_INTERPRETER_BINARY} DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+ FILES_MATCHING PATTERN "*.h")
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/Interpreter.h"
+
+#include "loader/ModuleLoader.h"
+
+namespace luci_interpreter
+{
+
+#ifdef USE_STATIC_ALLOC
+// Construct static interpreter with configurations
+Interpreter::Interpreter(const char *model_data_raw, const InterpreterConfigure &configuration)
+{
+ _runtime_module = std::make_unique<RuntimeModule>();
+
+ _memory_manager = StaticMemoryManager(configuration._input_buf_size, configuration._temp_buf_size,
+ configuration._output_buf_size)
+
+ // Note:
+ // configuration._input_buf_size, configuration._temp_buf_size, configuration._output_buf_size
+ // will be removed and will be read from circle file
+ if (configuration.isStaticManager())
+ {
+ _memory_manager = std::make_unique<StaticMemoryManager>(
+ configuration._input_buf_size, configuration._temp_buf_size, configuration._output_buf_size);
+ }
+ else { _memory_manager = std::make_unique<SimpleMemoryManager>(); }
+
+ _memory_manager->is_allocate_input(configuration.getAllocateInputValue());
+
+ ModuleLoader loader();
+ ModuleLoader::load(_runtime_module.get(), _memory_manager.get(),
+ /* is_static_allocations */ configuration.isStaticManager(), model_data_raw);
+
+ ModuleLoader loader(_runtime_module.get(), _memory_manager.get());
+ loader.load(configuration.isStaticManager(), model_data_raw);
+}
+#else
+
+// Construct default interpreter with dynamic allocations and with input allocations
+Interpreter::Interpreter(const char *model_data_raw)
+{
+ ModuleLoader::load(&_runtime_module, &_memory_manager, model_data_raw);
+}
+
+#endif // USE_STATIC_ALLOC
+
+Interpreter::~Interpreter() = default;
+
+void Interpreter::interpret() { _runtime_module.execute(); }
+
+int32_t Interpreter::getInputDataSizeByIndex(int32_t input_tensor_index)
+{
+ auto *runtime_graph = _runtime_module.getMainGraph();
+
+ return runtime_graph->getInputDataSizeByIndex(input_tensor_index);
+}
+
+int32_t Interpreter::getOutputDataSizeByIndex(int32_t output_tensor_index)
+{
+ auto *runtime_graph = _runtime_module.getMainGraph();
+
+ return runtime_graph->getOutputDataSizeByIndex(output_tensor_index);
+}
+
+void Interpreter::allocateAndWriteInputTensor(int32_t input_tensor_index, const void *data,
+ size_t data_size)
+{
+ assert(data_size > 0);
+ assert(data != nullptr);
+ assert(input_tensor_index >= 0);
+ auto *runtime_graph = _runtime_module.getMainGraph();
+ auto tensor_data = runtime_graph->configureGraphInput(input_tensor_index);
+
+ std::memcpy(tensor_data, data, data_size);
+}
+
+uint8_t *Interpreter::allocateInputTensor(int32_t input_tensor_index)
+{
+ assert(input_tensor_index >= 0);
+
+ auto *runtime_graph = _runtime_module.getMainGraph();
+
+ return runtime_graph->configureGraphInput(input_tensor_index);
+}
+
+uint8_t *Interpreter::readOutputTensor(int32_t output_tensor_index)
+{
+ auto *runtime_graph = _runtime_module.getMainGraph();
+
+ return runtime_graph->getOutputDataByIndex(output_tensor_index);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+set(SOURCES
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/DataType.h"
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/Tensor.h"
+ RuntimeGraph.h
+ RuntimeGraph.cpp
+ RuntimeModule.h)
+
+add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+
+add_subdirectory(reader)
+
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC "luci_micro_circle_reader${READER_SUFFIX}")
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_micro_circle_schema)
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC ${LUCI_INTERPRETER_MEMORY_MANAGER})
+
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/RuntimeGraph.h"
+#include "kernels/KernelBuilder.h"
+
+#include <algorithm>
+#include <map>
+
+namespace luci_interpreter
+{
+
+// IBaseRuntimeGraph
+RuntimeGraph::RuntimeGraph(SimpleMemoryManager *memory_manager, CircleReader *circle_reader)
+ : _memory_manager(memory_manager),
+ _tensor_to_data(std::unordered_map<const circle::Tensor *, uint8_t *>{}),
+ _reader(circle_reader), _inplace_op_indexes(std::unordered_set<uint32_t>{})
+{
+}
+
+RuntimeGraph::~RuntimeGraph()
+{
+ for (auto &idx_to_tensor : _tensor_to_data)
+ {
+ auto *data = idx_to_tensor.second;
+
+ _memory_manager->release_memory(data);
+ }
+}
+
+// TODO: modify this
+void RuntimeGraph::buildAllocDeallocPlan()
+{
+ invalidate();
+ using Lifetime = std::pair<int32_t, int32_t>;
+ std::map<const circle::Tensor *, Lifetime> lifetimes;
+ const size_t num_kernels = _reader->operators().size();
+
+ for (const auto input_ind : _reader->inputs())
+ {
+ const auto raw_tensor = _reader->tensors()[input_ind];
+
+ assert(lifetimes.count(raw_tensor) == 0);
+ lifetimes[raw_tensor] = Lifetime(-1, 0);
+ }
+
+ for (int32_t index = 0; index < num_kernels; ++index)
+ {
+ const auto kernel = _reader->operators().at(index);
+ assert(kernel != nullptr);
+
+ for (int32_t j = 0; j < kernel->inputs()->size(); ++j)
+ {
+ const auto input_index = kernel->inputs()->operator[](j);
+
+ if (input_index == -1)
+ continue;
+
+ const auto raw_tensor = _reader->tensors()[input_index];
+
+ // Pass constant tensors
+ auto const &buffer = wrap(_reader->buffers()[raw_tensor->buffer()]->data());
+ if (not buffer.empty())
+ {
+ // unknown shape tensor and scalar tensor
+ continue;
+ }
+
+ if (lifetimes.count(raw_tensor) > 0)
+ {
+ if (_inplace_op_indexes.find(index) != _inplace_op_indexes.end())
+ lifetimes.at(raw_tensor).second = -1;
+ else
+ lifetimes.at(raw_tensor).second = index;
+ }
+ }
+
+ for (int32_t j = 0; j < kernel->outputs()->size(); ++j)
+ {
+ const auto output_index = kernel->outputs()->operator[](j);
+ const auto raw_tensor = _reader->tensors()[output_index];
+
+ assert(lifetimes.count(raw_tensor) == 0);
+ if (_inplace_op_indexes.find(index) != _inplace_op_indexes.end())
+ lifetimes[raw_tensor] = Lifetime(-1, index);
+ else
+ lifetimes[raw_tensor] = Lifetime(index, index);
+ }
+ }
+
+ for (const auto output_ind : _reader->outputs())
+ {
+ const auto raw_tensor = _reader->tensors()[output_ind];
+
+ if (lifetimes.count(raw_tensor) > 0)
+ lifetimes.at(raw_tensor).second = num_kernels;
+ }
+
+ _alloc_plan.assign(num_kernels, std::vector<const circle::Tensor *>());
+ _dealloc_plan.assign(num_kernels + 1, std::vector<const circle::Tensor *>());
+ for (const auto &item : lifetimes)
+ {
+ if (item.second.first != -1)
+ _alloc_plan[item.second.first].push_back(item.first);
+ if (item.second.second != -1)
+ _dealloc_plan[item.second.second].push_back(item.first);
+ }
+ _is_valid = true;
+}
+
+void RuntimeGraph::allocate(size_t kernel_index)
+{
+ assert(_is_valid && kernel_index < _alloc_plan.size());
+ for (const circle::Tensor *tensor : _alloc_plan[kernel_index])
+ {
+ if (_tensor_to_data.find(tensor) != _tensor_to_data.end())
+ {
+ auto *data = _tensor_to_data.at(tensor);
+ _memory_manager->release_memory(data);
+ }
+ auto *data = _memory_manager->allocate_memory(tensor);
+ _tensor_to_data[tensor] = data;
+ }
+}
+
+void RuntimeGraph::deallocate(size_t kernel_index)
+{
+ assert(_is_valid && kernel_index < _dealloc_plan.size());
+ for (const circle::Tensor *tensor : _dealloc_plan[kernel_index])
+ {
+ const auto it = _tensor_to_data.find(tensor);
+ assert(it != _tensor_to_data.end());
+
+ auto *data = _tensor_to_data.at(tensor);
+ _memory_manager->release_memory(data);
+
+ _tensor_to_data.erase(it);
+ }
+}
+
+void RuntimeGraph::resetOutputTensorsData()
+{
+ for (int i = 0; i < _reader->outputs().size(); ++i)
+ {
+ const auto tensor_index = _reader->outputs()[i];
+ assert(tensor_index != -1);
+ const auto tensor = _reader->tensors()[tensor_index];
+ assert(tensor != nullptr);
+
+ auto tensor_it = _tensor_to_data.find(tensor);
+ if (tensor_it != _tensor_to_data.end())
+ {
+ auto *data = _tensor_to_data.at(tensor);
+ _memory_manager->release_memory(data);
+ _tensor_to_data.erase(tensor_it);
+ }
+ }
+}
+
+uint8_t *RuntimeGraph::configureGraphInput(int32_t input_index)
+{
+ resetOutputTensorsData();
+
+ const auto tensor_index = _reader->inputs()[input_index];
+ assert(tensor_index != -1);
+ const auto tensor = _reader->tensors()[tensor_index];
+ assert(tensor != nullptr);
+
+ if (_tensor_to_data.find(tensor) != _tensor_to_data.end())
+ {
+ auto *data = _tensor_to_data.at(tensor);
+ _memory_manager->release_memory(data);
+ }
+
+ auto *data = _memory_manager->allocate_memory(tensor);
+ _tensor_to_data[tensor] = data;
+
+ return data;
+}
+
+// To save data
+// TODO maybe remove it
+void RuntimeGraph::configureGraphInput(int32_t input_index, uint8_t *data)
+{
+ resetOutputTensorsData();
+
+ const auto tensor_index = _reader->inputs()[input_index];
+ assert(tensor_index != -1);
+ const auto tensor = _reader->tensors()[tensor_index];
+ assert(tensor != nullptr);
+
+ if (_tensor_to_data.find(tensor) != _tensor_to_data.end())
+ {
+ auto *data_prev = _tensor_to_data.at(tensor);
+ _memory_manager->release_memory(data_prev);
+ }
+ _tensor_to_data[tensor] = data;
+}
+
+int32_t RuntimeGraph::getInputDataSizeByIndex(int32_t input_index)
+{
+ const auto tensor_index = _reader->inputs()[input_index];
+ assert(tensor_index != -1);
+ const auto tensor = _reader->tensors()[tensor_index];
+ assert(tensor != nullptr);
+
+ return Tensor::num_elements(tensor) * size(Tensor::element_type(tensor));
+}
+
+int32_t RuntimeGraph::getOutputDataSizeByIndex(int32_t output_index)
+{
+ const auto tensor_index = _reader->outputs()[output_index];
+ assert(tensor_index != -1);
+ const auto tensor = _reader->tensors()[tensor_index];
+ assert(tensor != nullptr);
+
+ return Tensor::num_elements(tensor) * size(Tensor::element_type(tensor));
+}
+
+uint8_t *RuntimeGraph::getOutputDataByIndex(int32_t output_index)
+{
+ const auto tensor_index = _reader->outputs()[output_index];
+ assert(tensor_index != -1);
+ const auto tensor = _reader->tensors()[tensor_index];
+ assert(tensor != nullptr);
+
+ assert(_tensor_to_data.find(tensor) != _tensor_to_data.end());
+
+ return _tensor_to_data[tensor];
+}
+
+uint8_t *RuntimeGraph::getDataByTensor(const circle::Tensor *raw_tensor)
+{
+ if (raw_tensor == nullptr)
+ return nullptr;
+
+ if (_tensor_to_data.find(raw_tensor) == _tensor_to_data.end())
+ {
+ return nullptr;
+ }
+
+ return _tensor_to_data.at(raw_tensor);
+}
+
+void RuntimeGraph::makeInplaceOperation(const circle::Tensor *src_tensor,
+ const circle::Tensor *dst_tensor)
+{
+ if (src_tensor == nullptr or dst_tensor == nullptr)
+ return;
+
+ auto src_it = _tensor_to_data.find(src_tensor);
+
+ assert(src_it != _tensor_to_data.end() && "Failed makeInplaceOperation");
+
+ auto *data = _tensor_to_data[src_tensor];
+
+ _tensor_to_data.erase(src_it);
+
+ assert(_tensor_to_data.find(dst_tensor) == _tensor_to_data.end() &&
+ "Failed makeInplaceOperation");
+ _tensor_to_data[dst_tensor] = data;
+}
+
+uint8_t *RuntimeGraph::getConstDataByTensor(const circle::Tensor *raw_tensor)
+{
+ if (raw_tensor == nullptr)
+ return nullptr;
+
+ auto const &buffer = wrap(_reader->buffers()[raw_tensor->buffer()]->data());
+
+ return const_cast<uint8_t *>(buffer.data());
+}
+
+const circle::Tensor *RuntimeGraph::getCircleTensorByIndex(int32_t index)
+{
+ if (index < 0)
+ return nullptr;
+
+ const auto raw_tensor = _reader->tensors()[index];
+
+ return raw_tensor;
+}
+
+void RuntimeGraph::configure()
+{
+ KernelConfigureRegistry kernel_configure;
+
+ for (uint32_t i = 0; i < _reader->operators().size(); ++i)
+ {
+ const auto op = _reader->operators().at(i);
+ assert(op != nullptr);
+
+ const auto opcode = _reader->builtin_code(op);
+
+ kernel_configure.configure_kernel(op, opcode, this);
+ }
+
+ if (not _is_valid)
+ buildAllocDeallocPlan();
+
+ _is_valid = true;
+}
+
+void RuntimeGraph::execute()
+{
+ if (not _is_valid)
+ configure();
+
+ KernelExecuteRegistry kernel_executor;
+
+ for (uint32_t i = 0; i < _reader->operators().size(); ++i)
+ {
+ const auto op = _reader->operators().at(i);
+ assert(op != nullptr);
+
+ const auto opcode = _reader->builtin_code(op);
+
+ allocate(i);
+
+ bool is_inplace = false;
+
+ if (_inplace_op_indexes.find(i) != _inplace_op_indexes.end())
+ is_inplace = true;
+
+ kernel_executor.execute_kernel(op, opcode, this, is_inplace);
+
+ deallocate(i);
+ }
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
+#define LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
+
+#include "luci_interpreter/core/Tensor.h"
+#ifdef USE_STATIC_ALLOC
+#include "memory_managers/StaticMemoryManager.h"
+#else
+#include "memory_managers/SimpleMemoryManager.h"
+#endif // USE_STATIC_ALLOC
+
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <memory>
+#include <vector>
+#include <unordered_map>
+#include <unordered_set>
+
+namespace luci_interpreter
+{
+
+class RuntimeModule;
+
+#ifdef USE_STATIC_ALLOC
+// TODO: Enable it
+#if 0
+class StaticRuntimeGraph final : public IBaseRuntimeGraph
+{
+public:
+ explicit StaticRuntimeGraph(IMemoryManager *memory_manager, CircleReader *circle_reader);
+ ~StaticRuntimeGraph() final;
+
+ void configureGraphInputs() final;
+ void execute() final;
+ void configure() final;
+
+ void configure_kernels() final;
+};
+#endif
+#else
+
+class RuntimeGraph
+{
+public:
+ explicit RuntimeGraph(SimpleMemoryManager *memory_manager, CircleReader *circle_reader);
+ ~RuntimeGraph();
+
+ Tensor *addTensor(const circle::Tensor *raw_tensor, std::unique_ptr<Tensor> &&tensor);
+
+ const circle::Tensor *getCircleTensorByIndex(int32_t index);
+
+ void makeInplaceOperation(const circle::Tensor *src_tensor, const circle::Tensor *dst_tensor);
+
+ uint8_t *getDataByTensor(const circle::Tensor *raw_tensor);
+ uint8_t *getConstDataByTensor(const circle::Tensor *raw_tensor);
+
+ uint8_t *configureGraphInput(int32_t input_index);
+ void configureGraphInput(int32_t input_index, uint8_t *data);
+
+ int32_t getInputDataSizeByIndex(int32_t input_index);
+ int32_t getOutputDataSizeByIndex(int32_t output_index);
+
+ uint8_t *getOutputDataByIndex(int32_t output_index);
+
+ void addInplaceOpIndex(uint32_t index) { _inplace_op_indexes.insert(index); }
+
+ void execute();
+ void configure();
+
+ void invalidate() { _is_valid = false; }
+ bool isValid() const { return _is_valid; }
+
+private:
+ void buildAllocDeallocPlan();
+ void allocate(size_t kernel_index);
+ void deallocate(size_t kernel_index);
+
+ void resetOutputTensorsData();
+
+private:
+ SimpleMemoryManager *_memory_manager;
+ CircleReader *_reader;
+
+ std::unordered_map<const circle::Tensor *, uint8_t *> _tensor_to_data;
+ std::unordered_set<uint32_t> _inplace_op_indexes;
+
+ bool _is_valid = false;
+
+ // Tensors that are not used anymore after given op
+ std::vector<std::vector<const circle::Tensor *>> _alloc_plan;
+ std::vector<std::vector<const circle::Tensor *>> _dealloc_plan;
+};
+
+#endif // USE_STATIC_ALLOC
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
+#define LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
+
+#include "core/RuntimeGraph.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <memory>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+#ifdef USE_STATIC_ALLOC
+using BaseRuntimeGraph = StaticRuntimeGraph;
+using MemoryManager = StaticMemoryManager;
+#else
+using BaseRuntimeGraph = RuntimeGraph;
+using MemoryManager = SimpleMemoryManager;
+#endif // USE_STATIC_ALLOC
+
+class RuntimeModule
+{
+public:
+ RuntimeModule() = default;
+
+ void addGraph(MemoryManager *memory_manager)
+ {
+ _graphs.emplace_back(memory_manager, &_circle_reader);
+ }
+
+ BaseRuntimeGraph *getRuntimeGraphAt(uint32_t pos) { return &_graphs.at(pos); }
+
+ void execute() { getMainGraph()->execute(); }
+
+ CircleReader &getCircleReader() { return _circle_reader; }
+
+ BaseRuntimeGraph *getMainGraph() const { return const_cast<BaseRuntimeGraph *>(&_graphs[0]); }
+
+private:
+ std::vector<BaseRuntimeGraph> _graphs;
+
+ CircleReader _circle_reader;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
--- /dev/null
+set(MICRO_READER_SOURCE
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/reader/CircleMicroReader.h"
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/reader/CircleMicroReaderHelper.h"
+ "CircleMicroReader.cpp"
+ "CircleMicroReaderHelper.cpp"
+ )
+
+add_library("luci_micro_circle_reader${READER_SUFFIX}" STATIC ${MICRO_READER_SOURCE})
+target_link_libraries("luci_micro_circle_reader${READER_SUFFIX}" PUBLIC luci_micro_circle_schema)
+
+target_include_directories("luci_micro_circle_reader${READER_SUFFIX}" PUBLIC "${GENERATED_INCLUDE_DIR}")
+target_include_directories("luci_micro_circle_reader${READER_SUFFIX}" PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+#include "luci_interpreter/core/reader/CircleMicroReaderHelper.h"
+
+#include <algorithm>
+
+namespace luci_interpreter
+{
+
+// TODO check can we remove it
+DataType luci_datatype(const circle::TensorType type)
+{
+ switch (type)
+ {
+ case circle::TensorType_FLOAT32:
+ return DataType::FLOAT32;
+ case circle::TensorType_FLOAT16:
+ return DataType::FLOAT16;
+ case circle::TensorType_INT32:
+ return DataType::S32;
+ case circle::TensorType_UINT8:
+ return DataType::U8;
+ case circle::TensorType_INT64:
+ return DataType::S64;
+ case circle::TensorType_BOOL:
+ return DataType::BOOL;
+ case circle::TensorType_INT16:
+ return DataType::S16;
+ case circle::TensorType_COMPLEX64:
+ break;
+ case circle::TensorType_INT8:
+ return DataType::S8;
+ default:
+ break;
+ }
+ assert(false);
+ return DataType::Unknown;
+}
+
+FusedActFunc luci_actfunc(const circle::ActivationFunctionType type)
+{
+ switch (type)
+ {
+ case circle::ActivationFunctionType::ActivationFunctionType_NONE:
+ return FusedActFunc::NONE;
+ case circle::ActivationFunctionType::ActivationFunctionType_RELU:
+ return FusedActFunc::RELU;
+ case circle::ActivationFunctionType::ActivationFunctionType_RELU_N1_TO_1:
+ return FusedActFunc::RELU_N1_TO_1;
+ case circle::ActivationFunctionType::ActivationFunctionType_RELU6:
+ return FusedActFunc::RELU6;
+ case circle::ActivationFunctionType::ActivationFunctionType_TANH:
+ return FusedActFunc::TANH;
+ case circle::ActivationFunctionType::ActivationFunctionType_SIGN_BIT:
+ return FusedActFunc::SIGN_BIT;
+ default:
+ break;
+ }
+ assert(false);
+ return FusedActFunc::UNDEFINED;
+}
+
+Padding luci_padding(const circle::Padding padding)
+{
+ switch (padding)
+ {
+ case circle::Padding::Padding_SAME:
+ return Padding::SAME;
+ case circle::Padding::Padding_VALID:
+ return Padding::VALID;
+ }
+ assert(false);
+ return Padding::UNDEFINED;
+}
+
+MirrorPadMode luci_mirrorpad_mode(const circle::MirrorPadMode mode)
+{
+ switch (mode)
+ {
+ case circle::MirrorPadMode::MirrorPadMode_REFLECT:
+ return MirrorPadMode::REFLECT;
+ case circle::MirrorPadMode::MirrorPadMode_SYMMETRIC:
+ return MirrorPadMode::SYMMETRIC;
+ }
+ assert(false);
+ return MirrorPadMode::UNDEFINED;
+}
+
+circle::BuiltinOperator CircleReader::builtin_code(const circle::Operator *op) const
+{
+ assert(op != nullptr);
+
+ const auto op_codes = opcodes();
+ uint32_t index = op->opcode_index();
+ assert(index < op_codes.size());
+ const auto opcode = op_codes[index];
+ assert(opcode != nullptr);
+
+ return circle::builtin_code_neutral(opcode);
+}
+
+bool CircleReader::parse(const circle::Model *model)
+{
+ assert(model != nullptr);
+
+ // for direct pointer access
+ _model = model;
+
+ return true;
+}
+
+bool CircleReader::select_subgraph(uint32_t sgindex)
+{
+ if (num_subgraph() <= sgindex)
+ {
+ assert(false);
+ return false;
+ }
+
+ // for direct pointer access
+ auto subgraphs = _model->subgraphs();
+ assert(subgraphs != nullptr);
+
+ _current_subgraph = subgraphs->Get(sgindex);
+ assert(_current_subgraph != nullptr);
+
+ return true;
+}
+
+template <typename T>
+VectorWrapper<T>::VectorWrapper(const flatbuffers::Vector<T> *ptr) : _vector(ptr)
+{
+ // Do nothing
+}
+
+template <typename T> uint32_t VectorWrapper<T>::size() const
+{
+ return null() ? 0 : _vector->size();
+}
+
+template <typename T> const T *VectorWrapper<T>::data() const
+{
+ return null() ? nullptr : _vector->data();
+}
+
+template <typename T> typename VectorWrapper<T>::iterator VectorWrapper<T>::begin() const
+{
+ return null() ? iterator(nullptr, 0) : _vector->begin();
+}
+
+template <typename T> typename VectorWrapper<T>::iterator VectorWrapper<T>::end() const
+{
+ return null() ? begin() : _vector->end();
+}
+
+template <typename T> typename VectorWrapper<T>::value_type VectorWrapper<T>::at(uint32_t i) const
+{
+ if (i >= size())
+ {
+ // TODO find better error message
+ assert(false && "Access to prohibited vector element");
+ }
+
+ return _vector->Get(i);
+}
+
+template <typename T>
+typename VectorWrapper<T>::value_type VectorWrapper<T>::operator[](uint32_t i) const
+{
+ return at(i);
+}
+
+template <typename T> bool VectorWrapper<T>::null() const { return _vector == nullptr; }
+template <typename T> bool VectorWrapper<T>::empty() const { return size() == 0; }
+
+#define REGISTER_WRAPPER(T) template class VectorWrapper<T>
+REGISTER_WRAPPER(flatbuffers::Offset<circle::SubGraph>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Buffer>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Tensor>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Operator>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::OperatorCode>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Metadata>);
+REGISTER_WRAPPER(int32_t);
+REGISTER_WRAPPER(uint8_t);
+#undef REGISTER_WRAPPER
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/core/reader/CircleMicroReaderHelper.h"
+
+namespace circle
+{
+
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode)
+{
+ assert(opcode != nullptr);
+ if (opcode->deprecated_builtin_code() == 127)
+ {
+ assert(opcode->builtin_code() >= 127);
+ return opcode->builtin_code();
+ }
+ // There was no 255(-1) value in v0.3
+ assert(opcode->deprecated_builtin_code() != -1);
+ return static_cast<::circle::BuiltinOperator>(opcode->deprecated_builtin_code());
+}
+
+bool is_valid(const ::circle::OperatorCode *opcode)
+{
+ ::circle::BuiltinOperator code = opcode->builtin_code();
+ return (::circle::BuiltinOperator_MIN <= code && code <= ::circle::BuiltinOperator_MAX);
+}
+
+bool is_custom(const ::circle::OperatorCode *opcode)
+{
+ ::circle::BuiltinOperator code = opcode->builtin_code();
+ return (code == ::circle::BuiltinOperator_CUSTOM);
+}
+
+const char *tensor_type(const ::circle::Tensor *tensor)
+{
+ return ::circle::EnumNameTensorType(tensor->type());
+}
+
+} // namespace circle
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Add.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/add.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams ¶ms)
+ : KernelWithParams<AddParams>({input1, input2}, {output}, params)
+{
+}
+
+void Add::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+ if (input1()->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+ input2()->zero_points().size() == 1);
+ LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
+ output()->zero_point() == 0);
+ }
+
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Add::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ case DataType::S16:
+ evalQuantizedS16();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Add::evalFloat() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<float>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastAdd4DSlow(
+ params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+ getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<float>(input1()),
+ getTensorShape(input2()), getTensorData<float>(input2()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ }
+}
+
+template <typename T> void Add::evalInteger() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<T>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastAdd4DSlow(
+ params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+ getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
+void Add::evalQuantized() const
+{
+ const auto input1_scale = static_cast<double>(input1()->scale());
+ const auto input2_scale = static_cast<double>(input2()->scale());
+ const auto output_scale = static_cast<double>(output()->scale());
+
+ const int left_shift = 20;
+ const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+ const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+ const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+ const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+ int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+ int input1_shift{}, input2_shift{}, output_shift{};
+ quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+ quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+ quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::ArithmeticParams params{};
+ params.left_shift = left_shift;
+ // The kernel expects inputs' zero points to be negated.
+ params.input1_offset = -input1()->zero_point(); // Note the '-'.
+ params.input1_multiplier = input1_multiplier;
+ params.input1_shift = input1_shift;
+ params.input2_offset = -input2()->zero_point(); // Note the '-'.
+ params.input2_multiplier = input2_multiplier;
+ params.input2_shift = input2_shift;
+ params.output_offset = output()->zero_point();
+ params.output_multiplier = output_multiplier;
+ params.output_shift = output_shift;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastAdd4DSlow(
+ params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+ getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+ getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+ }
+}
+
+void Add::evalQuantizedS16() const
+{
+ const auto input1_scale = static_cast<double>(input1()->scale());
+ const auto input2_scale = static_cast<double>(input2()->scale());
+ const auto output_scale = static_cast<double>(output()->scale());
+
+ constexpr int left_shift = 12;
+ const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+ const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+ const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+ const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+ int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+ int input1_shift{}, input2_shift{}, output_shift{};
+ quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+ quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+ quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ auto fn = [input1_multiplier, input1_shift, //
+ input2_multiplier, input2_shift, //
+ output_multiplier, output_shift, //
+ activation_min, activation_max](int16_t input1_val, int16_t input2_val) {
+ const int32_t shifted_input1_val = static_cast<int32_t>(input1_val) << left_shift;
+ const int32_t shifted_input2_val = static_cast<int32_t>(input2_val) << left_shift;
+ const int32_t scaled_input1_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ shifted_input1_val, input1_multiplier, input1_shift);
+ const int32_t scaled_input2_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ shifted_input2_val, input2_multiplier, input2_shift);
+ const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+ const int32_t raw_output = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ raw_sum, output_multiplier, output_shift);
+ const int32_t clamped_output = std::min(activation_max, std::max(activation_min, raw_output));
+ return static_cast<int16_t>(clamped_output);
+ };
+
+ BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
+ getTensorShape(input2()), getTensorData<int16_t>(input2()),
+ getTensorShape(output()), getTensorData<int16_t>(output()), fn);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ArgMax.h"
+#include "kernels/Utils.h"
+#include "PALArgMax.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ArgMax::ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams ¶ms)
+ : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params)
+{
+}
+
+void ArgMax::configure()
+{
+ assert(axis()->element_type() == DataType::S32 || axis()->element_type() == DataType::S64);
+ assert(input()->shape().num_dims() >= 1);
+ const Shape &input_shape = input()->shape();
+ const int num_dims = input_shape.num_dims();
+ Shape output_shape(num_dims - 1);
+
+ // If axis value is negative, then update by adding input_shape's num_dims.
+ // If updated value also negative, then assert.
+ assert(axis()->shape().num_elements() == 1);
+ int axis_value = getTensorData<int32_t>(axis())[0];
+ if (axis_value < 0)
+ axis_value = axis_value + num_dims;
+ assert(axis_value >= 0);
+
+ int j = 0;
+ for (int i = 0; i < num_dims; i++)
+ {
+ if (i == axis_value)
+ continue;
+ output_shape.dim(j++) = input_shape.dim(i);
+ }
+
+ assert(output()->element_type() == _params.output_type);
+
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(output_shape);
+}
+
+void ArgMax::execute() const
+{
+
+#define TF_LITE_ARG_MAX(data_type, axis_type, output_type) \
+ luci_interpreter_pal::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \
+ getTensorData<axis_type>(axis()), getTensorShape(output()), \
+ getTensorData<output_type>(output()), std::greater<data_type>())
+ if (axis()->element_type() == DataType::S32)
+ {
+ switch (_params.output_type)
+ {
+ case DataType::S32:
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ TF_LITE_ARG_MAX(float, int32_t, int32_t);
+ break;
+ case DataType::U8:
+ TF_LITE_ARG_MAX(uint8_t, int32_t, int32_t);
+ break;
+ default:
+ assert(false && "Unsupported input type.");
+ }
+ break;
+ case DataType::S64:
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ TF_LITE_ARG_MAX(float, int32_t, int64_t);
+ break;
+ case DataType::U8:
+ TF_LITE_ARG_MAX(uint8_t, int32_t, int64_t);
+ break;
+ default:
+ assert(false && "Unsupported input type.");
+ }
+ break;
+ default:
+ assert(false && "Unsupported output type.");
+ }
+ }
+ else
+ {
+ switch (_params.output_type)
+ {
+ case DataType::S32:
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ TF_LITE_ARG_MAX(float, int64_t, int32_t);
+ break;
+ case DataType::U8:
+ TF_LITE_ARG_MAX(uint8_t, int64_t, int32_t);
+ break;
+ default:
+ assert(false && "Unsupported input type.");
+ }
+ break;
+ case DataType::S64:
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ TF_LITE_ARG_MAX(float, int64_t, int64_t);
+ break;
+ case DataType::U8:
+ TF_LITE_ARG_MAX(uint8_t, int64_t, int64_t);
+ break;
+ default:
+ assert(false && "Unsupported input type.");
+ }
+ break;
+ default:
+ assert(false && "Unsupported output type.");
+ }
+ }
+#undef TF_LITE_ARG_MAX
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/AveragePool2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALAveragePool2d.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
+ const Pool2DParams ¶ms)
+ : KernelWithParams<Pool2DParams>({input}, {output, scratchpad}, params)
+{
+}
+
+void AveragePool2D::configure()
+{
+ if (input()->element_type() != output()->element_type())
+ {
+ assert(false && "Input Tensor and Output Tensor Type must be same");
+ }
+ if (input()->shape().num_dims() != 4)
+ {
+ assert(false && "Input Tensor Shape must be 4-D");
+ }
+ const Shape &input_shape = input()->shape();
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t depth = input_shape.dim(3);
+
+ const int32_t output_height =
+ computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
+ const int32_t output_width =
+ computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
+
+ _padding_height =
+ computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
+ _padding_width =
+ computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
+ if (input()->element_type() == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+ }
+ else if (input()->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+ }
+ else if (input()->element_type() == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize({batches, output_height, output_width, depth});
+
+ auto scratchpad = getOutputTensors()[1];
+ luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(),
+ getTensorShape(input()), getTensorShape(output()));
+}
+
+void AveragePool2D::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ case DataType::S16:
+ evalSInt16();
+ break;
+ case DataType::S8:
+ evalSInt8();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void AveragePool2D::evalFloat() const
+{
+ float activation_min{};
+ float activation_max{};
+ calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+ tflite::PoolParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.filter_height = _params.filter_height;
+ params.filter_width = _params.filter_width;
+ params.float_activation_min = activation_min;
+ params.float_activation_max = activation_max;
+
+ tflite::reference_ops::AveragePool(params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void AveragePool2D::evalQuantized() const
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::PoolParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.filter_height = _params.filter_height;
+ params.filter_width = _params.filter_width;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ tflite::reference_ops::AveragePool(params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+}
+
+void AveragePool2D::evalSInt8() const
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+ tflite::PoolParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.filter_height = _params.filter_height;
+ params.filter_width = _params.filter_width;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ auto scratchpad = getOutputTensors()[1];
+ int8_t *scratchpad_data = nullptr;
+ if (scratchpad->is_allocatable())
+ scratchpad_data = scratchpad->data<int8_t>();
+
+ luci_interpreter_pal::AveragePool<int8_t>(
+ params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()),
+ getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void AveragePool2D::evalSInt16() const
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::PoolParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.filter_height = _params.filter_height;
+ params.filter_width = _params.filter_width;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ tflite::reference_integer_ops::AveragePool(
+ params, getTensorShape(input()), getTensorData<int16_t>(input()), //
+ getTensorShape(output()), getTensorData<int16_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchMatMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
+
+namespace
+{
+
+tflite::RuntimeShape SwapRowColumnDims(const tflite::RuntimeShape &shape)
+{
+ tflite::RuntimeShape swapped_shape(shape);
+ const int32_t dims = shape.DimensionsCount();
+ swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1));
+ swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2));
+ return swapped_shape;
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+BatchMatMul::BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp,
+ Tensor *y_tmp, const BatchMatMulParams ¶ms)
+ : KernelWithParams({x, y}, {output, x_tmp, y_tmp}, params)
+{
+}
+
+void BatchMatMul::configure()
+{
+ auto lhs = x();
+ auto rhs = y();
+ auto adj_x = params().adj_x;
+ auto adj_y = params().adj_y;
+
+ // TODO Support non-float types
+ if (lhs->element_type() != DataType::FLOAT32 || rhs->element_type() != DataType::FLOAT32)
+ assert(false && "Unsupported type.");
+
+ LUCI_INTERPRETER_CHECK(lhs->element_type() == rhs->element_type());
+
+ auto lhs_rank = lhs->shape().num_dims();
+ auto rhs_rank = rhs->shape().num_dims();
+ LUCI_INTERPRETER_CHECK(lhs_rank >= 2 && lhs_rank <= 4);
+ LUCI_INTERPRETER_CHECK(rhs_rank >= 2 && rhs_rank <= 4);
+
+ auto lhs_scratchpad = temp_lhs();
+ auto rhs_scratchpad = temp_rhs();
+ luci_interpreter_pal::SetupScratchpadTensor(lhs_scratchpad, rhs_scratchpad, getTensorShape(lhs),
+ getTensorShape(rhs));
+
+ auto output_rank = std::max(lhs_rank, rhs_rank);
+
+ auto extended_lhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(lhs));
+ auto extended_rhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(rhs));
+
+ // Ensure any batch dimensions obey broacasting rules.
+ for (int i = 0; i < output_rank - 2; ++i)
+ {
+ const int lhs_dim = extended_lhs_shape.Dims(i);
+ const int rhs_dim = extended_rhs_shape.Dims(i);
+ if (lhs_dim != rhs_dim)
+ {
+ if (lhs_dim != 1)
+ {
+ LUCI_INTERPRETER_CHECK(rhs_dim == 1);
+ }
+ }
+ }
+
+ // Ensure other dimensions work for matrix multiplication.
+ int accum_dim_lhs =
+ adj_x ? extended_lhs_shape.Dims(output_rank - 2) : extended_lhs_shape.Dims(output_rank - 1);
+ int accum_dim_rhs =
+ adj_y ? extended_rhs_shape.Dims(output_rank - 1) : extended_rhs_shape.Dims(output_rank - 2);
+ LUCI_INTERPRETER_CHECK(accum_dim_lhs == accum_dim_rhs);
+
+ Shape output_shape(output_rank);
+ // Fill in any broadcast dimensions.
+ for (int i = 0; i < output_rank - 2; ++i)
+ {
+ const int lhs_dim = extended_lhs_shape.Dims(i);
+ const int rhs_dim = extended_rhs_shape.Dims(i);
+ int broadcast_dim = lhs_dim;
+ if ((lhs_dim != rhs_dim) && (lhs_dim == 1))
+ {
+ broadcast_dim = rhs_dim;
+ }
+ output_shape.dim(i) = broadcast_dim;
+ }
+ // Fill in the matmul dimensions.
+ int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2;
+ int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1;
+
+ output_shape.dim(output_rank - 2) = extended_lhs_shape.Dims(lhs_rows_index);
+ output_shape.dim(output_rank - 1) = extended_rhs_shape.Dims(rhs_cols_index);
+
+ output()->resize(output_shape);
+}
+
+void TransposeRowsColumns(const Tensor *tensor_in, Tensor *tensor_out)
+{
+ tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in));
+ tflite::RuntimeShape shape(getTensorShape(tensor_in));
+ tflite::TransposeParams params;
+ int rank = shape.DimensionsCount();
+ params.perm_count = rank;
+ for (int i = 0; i < rank - 2; ++i)
+ {
+ params.perm[i] = i;
+ }
+ // Transpose the last two dimensions.
+ params.perm[rank - 2] = rank - 1;
+ params.perm[rank - 1] = rank - 2;
+ transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2));
+ transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1));
+ switch (tensor_in->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in),
+ transposed_shape, getTensorData<float>(tensor_out));
+ break;
+ default:
+ assert(false && "Only suppport fp32 BatchMatMul for now.");
+ }
+}
+
+void BatchMatMul::execute() const
+{
+ auto lhs = x();
+ auto rhs = y();
+
+ bool adj_x = params().adj_x;
+ bool adj_y = params().adj_y;
+
+ auto orig_lhs_shape = getTensorShape(lhs);
+ auto orig_rhs_shape = getTensorShape(rhs);
+
+ auto rhs_tensor = adj_y ? rhs : temp_rhs();
+ auto lhs_tensor = adj_x ? temp_lhs() : lhs;
+ if (not adj_y)
+ {
+ TransposeRowsColumns(rhs, temp_rhs());
+ }
+ if (adj_x)
+ {
+ TransposeRowsColumns(lhs, temp_lhs());
+ }
+ tflite::RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape);
+ tflite::RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape);
+
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ luci_interpreter_pal::BatchMatMul(rhs_shape, getTensorData<float>(rhs_tensor), lhs_shape,
+ getTensorData<float>(lhs_tensor), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchToSpaceND.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchToSpaceND.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+namespace
+{
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+} // namespace
+
+BatchToSpaceND::BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
+ Tensor *output)
+ : Kernel({input, block_shape, crops}, {output})
+{
+}
+
+void BatchToSpaceND::configure()
+{
+
+ const auto *block_shape_data = block_shape()->data<int32_t>();
+ const auto *crops_data = crops()->data<int32_t>();
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+ int spatial_dims_num = input()->shape().num_dims() - 2;
+
+ LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+ LUCI_INTERPRETER_CHECK(crops()->shape().num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(crops()->shape().dim(0) == spatial_dims_num);
+ LUCI_INTERPRETER_CHECK(crops()->shape().dim(1) == 2);
+ for (int i = 0; i < spatial_dims_num * 2; ++i)
+ {
+ LUCI_INTERPRETER_CHECK(crops_data[i] >= 0);
+ }
+
+ Shape output_shape = Shape(input()->shape().num_dims());
+ int output_batch_size = input()->shape().dim(0);
+ for (int i = 0; i < spatial_dims_num; ++i)
+ {
+ LUCI_INTERPRETER_CHECK(output_batch_size % block_shape_data[i] == 0);
+ output_batch_size = output_batch_size / block_shape_data[i];
+ output_shape.dim(i + 1) =
+ input()->shape().dim(i + 1) * block_shape_data[i] - crops_data[i * 2] - crops_data[i * 2 + 1];
+ }
+
+ output_shape.dim(0) = output_batch_size;
+ output_shape.dim(input()->shape().num_dims() - 1) =
+ input()->shape().dim(input()->shape().num_dims() - 1);
+
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(output_shape);
+}
+
+void BatchToSpaceND::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ luci_interpreter_pal::BatchToSpaceND(
+ getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()),
+ getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+ getTensorData<int32_t>(crops()), getTensorShape(output()), getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ luci_interpreter_pal::BatchToSpaceND(
+ getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()),
+ getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+ getTensorData<int32_t>(crops()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_NODES_BUILDERS_H
+#define LUCI_INTERPRETER_KERNELS_NODES_BUILDERS_H
+
+#include "KernelBuilder.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+#include "core/RuntimeGraph.h"
+
+namespace luci_interpreter
+{
+
+#define REGISTER_KERNEL(builtin_operator, name) \
+ void configure_kernel_Circle##name(const circle::Operator *cur_op, \
+ BaseRuntimeGraph *runtime_graph); \
+ \
+ void execute_kernel_Circle##name(const circle::Operator *cur_op, \
+ BaseRuntimeGraph *runtime_graph, bool is_inplace);
+
+#if USE_GENERATED_LIST
+#include "GeneratedKernelsToBuild.lst"
+#else
+#include "KernelsToBuild.lst"
+#endif
+
+#undef REGISTER_KERNEL
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_NODES_BUILDERS_H
--- /dev/null
+set(SOURCES
+ BinaryOpCommon.h
+ Utils.h
+ Utils.cpp
+ Builders.h
+ KernelBuilder.h
+ KernelBuilder.cpp)
+
+macro(REGISTER_KERNEL OPERATOR, NODE)
+ list(APPEND SOURCES "${NODE}.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
+
+add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
+
+target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE})
+
+add_pal_to_target(${LUCI_INTERPRETER_KERNELS})
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+macro(REGISTER_KERNEL OPERATOR, NODE)
+ list(APPEND TEST_SOURCES "${NODE}.test.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
+
+list(APPEND TEST_SOURCES TestUtils.h TestUtils.cpp)
+
+GTest_AddTest(${LUCI_INTERPRETER_KERNELS}_test ${TEST_SOURCES})
+target_link_libraries(${LUCI_INTERPRETER_KERNELS}_test ${LUCI_INTERPRETER_KERNELS})
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Cast.h"
+#include "kernels/Utils.h"
+
+namespace
+{
+
+using namespace luci_interpreter;
+using namespace luci_interpreter::kernels;
+
+template <typename InT, typename OutT>
+void cast_data(const InT *in_data, OutT *out_data, uint32_t elements_count)
+{
+ std::transform(in_data, in_data + elements_count, out_data,
+ [](InT a) { return static_cast<OutT>(a); });
+}
+
+template <typename InT> void cast_from_pointer_to_tensor(const InT *in_data, Tensor *out_tensor)
+{
+ auto const out_type = out_tensor->element_type();
+ auto const elements_count = out_tensor->shape().num_elements();
+
+ switch (out_type)
+ {
+ case DataType::U8:
+ cast_data(in_data, getTensorData<uint8_t>(out_tensor), elements_count);
+ break;
+ case DataType::U16:
+ cast_data(in_data, getTensorData<uint16_t>(out_tensor), elements_count);
+ break;
+ case DataType::U32:
+ cast_data(in_data, getTensorData<uint32_t>(out_tensor), elements_count);
+ break;
+ case DataType::U64:
+ cast_data(in_data, getTensorData<uint64_t>(out_tensor), elements_count);
+ break;
+ case DataType::S8:
+ cast_data(in_data, getTensorData<int8_t>(out_tensor), elements_count);
+ break;
+ case DataType::S16:
+ cast_data(in_data, getTensorData<int16_t>(out_tensor), elements_count);
+ break;
+ case DataType::S32:
+ cast_data(in_data, getTensorData<int32_t>(out_tensor), elements_count);
+ break;
+ case DataType::S64:
+ cast_data(in_data, getTensorData<int64_t>(out_tensor), elements_count);
+ break;
+ case DataType::FLOAT32:
+ cast_data(in_data, getTensorData<float>(out_tensor), elements_count);
+ break;
+ case DataType::BOOL:
+ cast_data(in_data, getTensorData<bool>(out_tensor), elements_count);
+ break;
+ default:
+ assert(false && "Unsupported output type.");
+ }
+}
+
+void cast_from_tensor_to_tensor(const Tensor *in_tensor, Tensor *out_tensor)
+{
+ auto in_type = in_tensor->element_type();
+
+ switch (in_type)
+ {
+ case DataType::U8:
+ cast_from_pointer_to_tensor(getTensorData<uint8_t>(in_tensor), out_tensor);
+ break;
+ case DataType::U16:
+ cast_from_pointer_to_tensor(getTensorData<uint16_t>(in_tensor), out_tensor);
+ break;
+ case DataType::U32:
+ cast_from_pointer_to_tensor(getTensorData<uint32_t>(in_tensor), out_tensor);
+ break;
+ case DataType::U64:
+ cast_from_pointer_to_tensor(getTensorData<uint64_t>(in_tensor), out_tensor);
+ break;
+ case DataType::S8:
+ cast_from_pointer_to_tensor(getTensorData<int8_t>(in_tensor), out_tensor);
+ break;
+ case DataType::S16:
+ cast_from_pointer_to_tensor(getTensorData<int16_t>(in_tensor), out_tensor);
+ break;
+ case DataType::S32:
+ cast_from_pointer_to_tensor(getTensorData<int32_t>(in_tensor), out_tensor);
+ break;
+ case DataType::S64:
+ cast_from_pointer_to_tensor(getTensorData<int64_t>(in_tensor), out_tensor);
+ break;
+ case DataType::FLOAT32:
+ cast_from_pointer_to_tensor(getTensorData<float>(in_tensor), out_tensor);
+ break;
+ case DataType::BOOL:
+ cast_from_pointer_to_tensor(getTensorData<bool>(in_tensor), out_tensor);
+ break;
+ default:
+ assert(false && "Unsupported input type.");
+ }
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Cast::Cast(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Cast::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() != DataType::Unknown);
+ LUCI_INTERPRETER_CHECK(output()->element_type() != DataType::Unknown);
+
+ const Shape &shape = input()->shape();
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(shape);
+}
+
+void Cast::execute() const
+{
+ assert(input()->shape().num_elements() == output()->shape().num_elements());
+
+ cast_from_tensor_to_tensor(input(), output());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/concatenation.h>
+
+namespace luci_interpreter
+{
+
+namespace
+{
+
+template <typename T>
+void evalGeneric(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph, bool)
+{
+ const auto output_index = cur_op->outputs()->operator[](0);
+
+ assert(output_index != -1);
+
+ auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+ const auto *options = cur_op->builtin_options_as_ConcatenationOptions();
+
+ int axis = options->axis();
+ if (axis < 0)
+ axis += Tensor::num_dims(output);
+
+ const auto input_sizes = cur_op->inputs()->size();
+
+ std::vector<const T *> all_input_data;
+ std::vector<tflite::RuntimeShape> all_shape;
+ std::vector<tflite::RuntimeShape *> all_shape_ptr;
+
+ all_input_data.reserve(input_sizes);
+ all_shape.reserve(input_sizes);
+ all_shape_ptr.reserve(input_sizes);
+
+ for (int32_t i = 0; i < input_sizes; ++i)
+ {
+ auto input_index = cur_op->inputs()->operator[](i);
+ const auto *tensor = runtime_graph->getCircleTensorByIndex(input_index);
+
+ auto *data = reinterpret_cast<const T *>(runtime_graph->getDataByTensor(tensor));
+
+ all_input_data.push_back(data);
+ all_shape.push_back(kernels::getTensorShape(tensor));
+ }
+
+ for (tflite::RuntimeShape &shape : all_shape)
+ {
+ all_shape_ptr.push_back(&shape);
+ }
+
+ auto *output_data = reinterpret_cast<T *>(runtime_graph->getDataByTensor(output));
+
+ // kernels::VectorOfTensors<T, true> inputs(_inputs);
+ tflite::ConcatenationParams params{};
+ params.axis = axis;
+ params.inputs_count = input_sizes;
+ tflite::reference_ops::Concatenation(params, all_shape_ptr.data(), all_input_data.data(),
+ kernels::getTensorShape(output), output_data);
+}
+
+} // namespace
+
+void configure_kernel_CircleConcatenation(const circle::Operator *cur_op,
+ BaseRuntimeGraph *runtime_graph)
+{
+ const int num_inputs = cur_op->inputs()->size();
+ LUCI_INTERPRETER_CHECK(num_inputs > 0);
+
+ auto input_index = cur_op->inputs()->operator[](0);
+ auto output_index = cur_op->outputs()->operator[](0);
+
+ assert(input_index != -1);
+ assert(output_index != -1);
+
+ const auto *t0 = runtime_graph->getCircleTensorByIndex(input_index);
+ const auto *output = runtime_graph->getCircleTensorByIndex(output_index);
+
+ const auto *params = cur_op->builtin_options_as_ConcatenationOptions();
+
+ // TODO: Support concat with fused activation function
+ LUCI_INTERPRETER_CHECK(luci_actfunc(params->fused_activation_function()) == FusedActFunc::NONE);
+
+ int axis = params->axis();
+ if (axis < 0)
+ axis += Tensor::num_dims(t0);
+ LUCI_INTERPRETER_CHECK(axis >= 0 && axis < Tensor::num_dims(t0));
+
+ int32_t sum_axis = Tensor::dim(t0, axis);
+ for (int i = 1; i < num_inputs; ++i)
+ {
+ input_index = cur_op->inputs()->operator[](i);
+ const auto *tensor = runtime_graph->getCircleTensorByIndex(input_index);
+ LUCI_INTERPRETER_CHECK(Tensor::element_type(tensor) == Tensor::element_type(t0));
+ LUCI_INTERPRETER_CHECK(Tensor::num_dims(tensor) == Tensor::num_dims(t0));
+ for (int d = 0; d < Tensor::num_dims(t0); ++d)
+ {
+ if (d == axis)
+ {
+ sum_axis += Tensor::dim(tensor, axis);
+ }
+ else
+ {
+ LUCI_INTERPRETER_CHECK(Tensor::dim(tensor, d) == Tensor::dim(t0, d));
+ }
+ }
+ }
+
+#ifndef DIS_QUANT
+ // If input tensors are INT8 type then quantization parameters of all input tensors and the output
+ // should be the same
+ for (int i = 1; i < num_inputs; ++i)
+ {
+ input_index = cur_op->inputs()->operator[](i);
+ const auto *tensor = runtime_graph->getCircleTensorByIndex(input_index);
+ if (Tensor::element_type(tensor) == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(Tensor::quantized_dimension(tensor) ==
+ Tensor::quantized_dimension(output));
+
+ LUCI_INTERPRETER_CHECK(Tensor::zero_points(tensor).size() == Tensor::scales(tensor).size());
+ LUCI_INTERPRETER_CHECK(Tensor::zero_points(tensor) == Tensor::zero_points(output));
+ LUCI_INTERPRETER_CHECK(Tensor::scales(tensor) == Tensor::scales(output));
+ }
+ }
+#endif // DIS_QUANT
+}
+
+void execute_kernel_CircleConcatenation(const circle::Operator *cur_op,
+ BaseRuntimeGraph *runtime_graph, bool is_inplace)
+{
+ int num_inputs = cur_op->inputs()->size();
+ LUCI_INTERPRETER_CHECK(num_inputs > 0);
+
+ const auto input_index = cur_op->inputs()->operator[](0);
+ assert(input_index != -1);
+ const auto *t0 = runtime_graph->getCircleTensorByIndex(input_index);
+
+ switch (Tensor::element_type(t0))
+ {
+#ifndef DIS_FLOAT
+ case DataType::FLOAT32:
+ evalGeneric<float>(cur_op, runtime_graph, is_inplace);
+ break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+ case DataType::S8:
+ evalGeneric<int8_t>(cur_op, runtime_graph, is_inplace);
+ break;
+ case DataType::S32:
+ evalGeneric<int32_t>(cur_op, runtime_graph, is_inplace);
+ break;
+ case DataType::S64:
+ evalGeneric<int64_t>(cur_op, runtime_graph, is_inplace);
+ break;
+#endif
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// TODO enable it
+#if 0
+#include "kernels/Concatenation.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ConcatenationTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ConcatenationTest, Float)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ ConcatenationParams params{};
+
+ // Try different 'axis' and expect different results.
+ {
+ params.axis = 0;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ kernel.configure();
+ for (auto t : kernel.getOutputTensors())
+ {
+ _memory_manager->allocate_memory(*t);
+ }
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
+ }
+ {
+ params.axis = -2; // Same as '0'.
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
+ }
+ {
+ params.axis = 1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
+ }
+ {
+ params.axis = -1; // Same as '1'.
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
+ }
+}
+
+TEST_F(ConcatenationTest, Input_Number_Check_NEG)
+{
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Invalid_Axis_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ ConcatenationParams params{};
+
+ params.axis = -3;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Type_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<uint8_t> input2_data{7, 8, 9, 10, 11, 12};
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<float> input2_data{7, 8, 9, 10, 11, 12, 13, 14, 15};
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Type_NEG)
+{
+ std::vector<uint8_t> input1_data{1, 2, 3, 4};
+ std::vector<int8_t> input2_data{5, 6, 7, 8};
+ Tensor input1_tensor = makeInputTensor<DataType::U8>({2, 2}, input1_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 2}, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8);
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Output_Quant_Params_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+ int quantized_dimension = 3;
+ std::vector<float> scales{0.1, 0.2, 0.3};
+ std::vector<int32_t> zero_points{1, -1, 1};
+
+ Tensor input1_tensor = makeInputTensor<DataType::S8>(
+ {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input1_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S8>(
+ {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, scales.at(0), zero_points.at(0));
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Zero_Point_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4};
+ std::vector<float> input2_data{5, 6, 7, 8};
+ float scale = 0.1;
+ int32_t zero_point_1 = 1;
+ int32_t zero_point_2 = -1;
+
+ Tensor input1_tensor =
+ makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_1, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_2, input2_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::S8, scale, zero_point_1);
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+// TODO: Remove this test when concat w/ fused_activation is supported
+TEST_F(ConcatenationTest, With_Fused_Activation_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ ConcatenationParams params{};
+
+ params.axis = 1;
+ params.activation = luci::FusedActFunc::RELU;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "PALConv2d.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+
+int32_t compute_padding_h(const circle::Tensor *input, const circle::Tensor *filter,
+ const circle::Conv2DOptions *options)
+{
+ const int32_t input_height = Tensor::dim(input, 1);
+ const int32_t filter_height = Tensor::dim(filter, 1);
+ const int32_t output_height =
+ kernels::computeOutputSize(luci_padding(options->padding()), input_height, filter_height,
+ options->stride_h(), options->dilation_h_factor());
+
+ const auto padding_height = kernels::computePadding(
+ options->stride_h(), options->dilation_h_factor(), input_height, filter_height, output_height);
+ return padding_height;
+}
+
+int32_t compute_padding_w(const circle::Tensor *input, const circle::Tensor *filter,
+ const circle::Conv2DOptions *options)
+{
+ const int32_t input_width = Tensor::dim(input, 2);
+ const int32_t filter_width = Tensor::dim(filter, 2);
+ const int32_t output_width =
+ kernels::computeOutputSize(luci_padding(options->padding()), input_width, filter_width,
+ options->stride_w(), options->dilation_w_factor());
+
+ const auto padding_width = kernels::computePadding(
+ options->stride_w(), options->dilation_w_factor(), input_width, filter_width, output_width);
+
+ return padding_width;
+}
+
+#ifndef DIS_FLOAT
+
+void evalFloat(const circle::Tensor *input, const circle::Tensor *filter,
+ const circle::Tensor *bias, const circle::Tensor *output,
+ const circle::Conv2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+ float activation_min{};
+ float activation_max{};
+ kernels::calculateActivationRange(luci_actfunc(options->fused_activation_function()),
+ &activation_min, &activation_max);
+
+ tflite::ConvParams params{};
+ params.padding_values.height = compute_padding_h(input, filter, options);
+ params.padding_values.width = compute_padding_w(input, filter, options);
+ params.stride_height = options->stride_h();
+ params.stride_width = options->stride_w();
+ params.dilation_height_factor = options->dilation_h_factor();
+ params.dilation_width_factor = options->dilation_w_factor();
+ params.float_activation_min = activation_min;
+ params.float_activation_max = activation_max;
+
+ auto *input_data = runtime_graph->getDataByTensor(input);
+ auto *output_data = runtime_graph->getDataByTensor(output);
+
+ auto *filter_data = runtime_graph->getConstDataByTensor(filter);
+ auto *bias_data = runtime_graph->getConstDataByTensor(bias);
+
+ luci_interpreter_pal::Conv(
+ params, kernels::getTensorShape(input), kernels::getTensorData<float>(input_data),
+ kernels::getTensorShape(filter), kernels::getTensorData<float>(filter_data),
+ kernels::getTensorShape(bias), kernels::getTensorData<float>(bias_data),
+ kernels::getTensorShape(output), kernels::getTensorData<float>(output_data),
+ kernels::getTensorShape(nullptr), nullptr);
+}
+
+#endif // DIS_FLOAT
+
+#ifndef DIS_QUANT
+
+void evalQuantized(const circle::Tensor *input, const circle::Tensor *filter,
+ const circle::Tensor *bias, const circle::Tensor *output,
+ const circle::Conv2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+ const auto input_scale = static_cast<double>(Tensor::scale(input));
+ const auto filter_scale = static_cast<double>(Tensor::scale(filter));
+ const auto output_scale = static_cast<double>(Tensor::scale(output));
+
+ const double real_multiplier = input_scale * filter_scale / output_scale;
+ int32_t output_multiplier{};
+ int output_shift{};
+ kernels::quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+ output, &activation_min, &activation_max);
+
+ tflite::ConvParams params{};
+ params.padding_values.height = compute_padding_h(input, filter, options);
+ params.padding_values.width = compute_padding_w(input, filter, options);
+ params.stride_height = options->stride_h();
+ params.stride_width = options->stride_w();
+ params.dilation_height_factor = options->dilation_h_factor();
+ params.dilation_width_factor = options->dilation_w_factor();
+ // The kernel expects input and filter zero points to be negated.
+ params.input_offset = -Tensor::zero_point(input); // Note the '-'.
+ params.weights_offset = -Tensor::zero_point(filter); // Note the '-'.
+ params.output_offset = Tensor::zero_point(output);
+ params.output_multiplier = output_multiplier;
+ params.output_shift = output_shift;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ auto *input_data = runtime_graph->getDataByTensor(input);
+ auto *output_data = runtime_graph->getDataByTensor(output);
+
+ auto *filter_data = runtime_graph->getConstDataByTensor(filter);
+ auto *bias_data = runtime_graph->getConstDataByTensor(bias);
+
+ luci_interpreter_pal::Conv(
+ params, kernels::getTensorShape(input), kernels::getTensorData<uint8_t>(input_data),
+ kernels::getTensorShape(filter), kernels::getTensorData<uint8_t>(filter_data),
+ kernels::getTensorShape(bias), kernels::getTensorData<int32_t>(bias_data),
+ kernels::getTensorShape(output), kernels::getTensorData<uint8_t>(output_data),
+ kernels::getTensorShape(nullptr), nullptr);
+}
+
+void evalQuantizedPerChannel(const circle::Tensor *input, const circle::Tensor *filter,
+ const circle::Tensor *bias, const circle::Tensor *output,
+ const circle::Conv2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+ auto *raw_input_data = runtime_graph->getDataByTensor(input);
+ auto *raw_output_data = runtime_graph->getDataByTensor(output);
+
+ auto *raw_filter_data = runtime_graph->getConstDataByTensor(filter);
+ auto *raw_bias_data = runtime_graph->getConstDataByTensor(bias);
+
+ const auto *input_data = kernels::getTensorData<uint8_t>(raw_input_data);
+ const auto *filter_data = kernels::getTensorData<uint8_t>(raw_filter_data);
+ const auto *bias_data = kernels::getTensorData<int32_t>(raw_bias_data);
+ auto *output_data = kernels::getTensorData<uint8_t>(raw_output_data);
+
+ const int32_t batches = Tensor::dim(input, 0);
+ const int32_t input_height = Tensor::dim(input, 1);
+ const int32_t input_width = Tensor::dim(input, 2);
+ const int32_t input_depth = Tensor::dim(input, 3);
+ const int32_t output_depth = Tensor::dim(filter, 0);
+ const int32_t filter_height = Tensor::dim(filter, 1);
+ const int32_t filter_width = Tensor::dim(filter, 2);
+ const int32_t output_height = Tensor::dim(output, 1);
+ const int32_t output_width = Tensor::dim(output, 2);
+
+ const int32_t stride_height = options->stride_h();
+ const int32_t stride_width = options->stride_w();
+ const int32_t dilation_height_factor = options->dilation_h_factor();
+ const int32_t dilation_width_factor = options->dilation_w_factor();
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+ output, &activation_min, &activation_max);
+
+ const std::vector<double> effective_output_scale = kernels::getQuantizedConvolutionMultiplers(
+ Tensor::scale(input), Tensor::scales(filter), Tensor::scale(output));
+
+ const std::vector<kernels::ChannelQuantMultipliers> multipliers_raw =
+ kernels::quantizeMultipliers(effective_output_scale);
+ kernels::BroadcastableWrapper<kernels::ChannelQuantMultipliers> quant_multipliers(
+ multipliers_raw);
+
+ for (int32_t batch = 0; batch < batches; ++batch)
+ {
+ for (int32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+ {
+ const int32_t in_y_origin =
+ out_y * stride_height - compute_padding_h(input, filter, options);
+ const int32_t in_x_origin =
+ out_x * stride_width - compute_padding_w(input, filter, options);
+ int32_t acc = 0;
+ for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+ const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+ if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+ {
+ for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+ {
+ const uint8_t input_val =
+ input_data[kernels::calcOffset(input, batch, in_y, in_x, in_c)];
+ const uint8_t filter_val =
+ filter_data[kernels::calcOffset(filter, out_c, filter_y, filter_x, in_c)];
+ acc += static_cast<int32_t>(input_val - Tensor::zero_point(input)) *
+ static_cast<int32_t>(filter_val - Tensor::zero_points(filter)[out_c]);
+ }
+ }
+ }
+ }
+ if (bias_data)
+ {
+ acc += bias_data[out_c];
+ }
+
+ int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+ acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
+
+ scaled_acc += Tensor::zero_point(output);
+ scaled_acc = std::max(scaled_acc, activation_min);
+ scaled_acc = std::min(scaled_acc, activation_max);
+ output_data[kernels::calcOffset(output, batch, out_y, out_x, out_c)] = scaled_acc;
+ }
+ }
+ }
+ }
+}
+
+void evalQuantizedS8PerChannel(const circle::Tensor *input, const circle::Tensor *filter,
+ const circle::Tensor *bias, const circle::Tensor *output,
+ const circle::Conv2DOptions *options,
+ BaseRuntimeGraph *runtime_graph)
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+ output, &activation_min, &activation_max);
+
+ tflite::ConvParams params{};
+ params.padding_values.height = compute_padding_h(input, filter, options);
+ params.padding_values.width = compute_padding_w(input, filter, options);
+ params.stride_height = options->stride_h();
+ params.stride_width = options->stride_w();
+ params.dilation_height_factor = options->dilation_h_factor();
+ params.dilation_width_factor = options->dilation_w_factor();
+ // The kernel expects filter zero points to be negated.
+ params.input_offset = -Tensor::zero_point(input); // Note the '-'.
+ params.weights_offset = 0; // Unused in tflite code
+ params.output_offset = Tensor::zero_point(output);
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ const std::vector<double> effective_output_scales = kernels::getQuantizedConvolutionMultiplers(
+ Tensor::scale(input), Tensor::scales(filter), Tensor::scale(output));
+
+ std::vector<kernels::ChannelQuantMultipliers> quant_multipliers =
+ kernels::quantizeMultipliers(effective_output_scales);
+
+ std::vector<int32_t> shifts;
+ std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+ [](kernels::ChannelQuantMultipliers cm) { return cm.shift; });
+ std::vector<int32_t> multipliers;
+ std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+ std::back_inserter(multipliers),
+ [](kernels::ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+ auto *input_data = runtime_graph->getDataByTensor(input);
+ auto *output_data = runtime_graph->getDataByTensor(output);
+
+ auto *filter_data = runtime_graph->getConstDataByTensor(filter);
+ auto *bias_data = runtime_graph->getConstDataByTensor(bias);
+
+ luci_interpreter_pal::ConvPerChannel(
+ params, multipliers.data(), shifts.data(), kernels::getTensorShape(input),
+ kernels::getTensorData<int8_t>(input_data), kernels::getTensorShape(filter),
+ kernels::getTensorData<int8_t>(filter_data), kernels::getTensorShape(bias),
+ kernels::getTensorData<int32_t>(bias_data), kernels::getTensorShape(output),
+ kernels::getTensorData<int8_t>(output_data), kernels::getTensorShape(nullptr), nullptr);
+}
+
+void evalQuantizedS16(const circle::Tensor *input, const circle::Tensor *filter,
+ const circle::Tensor *bias, const circle::Tensor *output,
+ const circle::Conv2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+ auto *raw_input_data = runtime_graph->getDataByTensor(input);
+ auto *raw_output_data = runtime_graph->getDataByTensor(output);
+
+ auto *raw_filter_data = runtime_graph->getConstDataByTensor(filter);
+ auto *raw_bias_data = runtime_graph->getConstDataByTensor(bias);
+
+ const auto *input_data = kernels::getTensorData<uint8_t>(raw_input_data);
+ const auto *filter_data = kernels::getTensorData<uint8_t>(raw_filter_data);
+ const auto *bias_data = kernels::getTensorData<int32_t>(raw_bias_data);
+ auto *output_data = kernels::getTensorData<uint8_t>(raw_output_data);
+
+ const int32_t batches = Tensor::dim(input, 0);
+ const int32_t input_height = Tensor::dim(input, 1);
+ const int32_t input_width = Tensor::dim(input, 2);
+ const int32_t input_depth = Tensor::dim(input, 3);
+ const int32_t output_depth = Tensor::dim(filter, 0);
+ const int32_t filter_height = Tensor::dim(filter, 1);
+ const int32_t filter_width = Tensor::dim(filter, 2);
+ const int32_t output_height = Tensor::dim(output, 1);
+ const int32_t output_width = Tensor::dim(output, 2);
+
+ const int32_t stride_height = options->stride_h();
+ const int32_t stride_width = options->stride_w();
+ const int32_t dilation_height_factor = options->dilation_h_factor();
+ const int32_t dilation_width_factor = options->dilation_w_factor();
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+ output, &activation_min, &activation_max);
+
+ const std::vector<double> effective_output_scale = kernels::getQuantizedConvolutionMultiplers(
+ Tensor::scale(input), Tensor::scales(filter), Tensor::scale(output));
+
+ const std::vector<kernels::ChannelQuantMultipliers> multipliers_raw =
+ kernels::quantizeMultipliers(effective_output_scale);
+ kernels::BroadcastableWrapper<kernels::ChannelQuantMultipliers> multipliers(multipliers_raw);
+
+ for (int32_t batch = 0; batch < batches; ++batch)
+ {
+ for (int32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+ {
+ const int32_t in_y_origin =
+ out_y * stride_height - compute_padding_h(input, filter, options);
+ const int32_t in_x_origin =
+ out_x * stride_width - compute_padding_w(input, filter, options);
+ int64_t acc = 0;
+ for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+ const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+ if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+ {
+ for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+ {
+ const int16_t input_val =
+ input_data[kernels::calcOffset(input, batch, in_y, in_x, in_c)];
+ const int16_t filter_val =
+ filter_data[kernels::calcOffset(filter, out_c, filter_y, filter_x, in_c)];
+ acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+ }
+ }
+ }
+ }
+ if (bias_data)
+ {
+ acc += bias_data[out_c];
+ }
+
+ int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+ acc, multipliers[out_c].multiplier, multipliers[out_c].shift);
+
+ scaled_acc = std::max(scaled_acc, activation_min);
+ scaled_acc = std::min(scaled_acc, activation_max);
+
+ output_data[kernels::calcOffset(output, batch, out_y, out_x, out_c)] = scaled_acc;
+ }
+ }
+ }
+ }
+}
+#endif // DIS_QUANT
+
+} // namespace
+
+void configure_kernel_CircleConv2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+ const auto input_index = cur_op->inputs()->operator[](0);
+ const auto filter_index = cur_op->inputs()->operator[](1);
+ const auto bias_index = cur_op->inputs()->operator[](2);
+ const auto output_index = cur_op->outputs()->operator[](0);
+
+ assert(input_index != -1);
+ assert(filter_index != -1);
+ assert(output_index != -1);
+
+ const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+ const auto filter = runtime_graph->getCircleTensorByIndex(filter_index);
+ const auto bias = runtime_graph->getCircleTensorByIndex(bias_index);
+ const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+ assert(input != nullptr);
+ assert(filter != nullptr);
+
+ auto filter_data = runtime_graph->getConstDataByTensor(filter);
+
+ assert(filter_data != nullptr);
+
+ const auto *options = cur_op->builtin_options_as_Conv2DOptions();
+
+ if (Tensor::element_type(input) == DataType::FLOAT32 &&
+ Tensor::element_type(filter) == DataType::FLOAT32)
+ {
+ LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::element_type(bias) == DataType::FLOAT32);
+ }
+#ifndef DIS_QUANT
+ else if (Tensor::element_type(input) == DataType::U8 &&
+ Tensor::element_type(filter) == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::element_type(bias) == DataType::S32);
+ }
+ else if (Tensor::element_type(input) == DataType::S8 &&
+ Tensor::element_type(filter) == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::element_type(bias) == DataType::S32);
+ LUCI_INTERPRETER_CHECK(Tensor::num_dims(filter) == 4);
+ LUCI_INTERPRETER_CHECK(Tensor::scales(filter).size() ==
+ static_cast<size_t>(Tensor::dim(filter, 0)));
+ for (auto zerop : Tensor::zero_points(filter))
+ {
+ LUCI_INTERPRETER_CHECK(zerop == 0);
+ }
+ }
+ else if (Tensor::element_type(input) == DataType::S16 &&
+ Tensor::element_type(filter) == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::element_type(bias) == DataType::S64);
+ }
+#endif // DIS_QUANT
+ else
+ {
+ assert(false && "Unsupported type.");
+ }
+ LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == Tensor::element_type(input));
+ LUCI_INTERPRETER_CHECK(Tensor::num_dims(input) == 4 && Tensor::num_dims(filter) == 4);
+
+ const int32_t output_depth = Tensor::dim(filter, 0);
+ LUCI_INTERPRETER_CHECK(Tensor::dim(filter, 3) == Tensor::dim(input, 3));
+
+ LUCI_INTERPRETER_CHECK(bias == nullptr ||
+ (Tensor::num_dims(bias) == 1 && Tensor::dim(bias, 0) == output_depth));
+
+ switch (options->fused_activation_function())
+ {
+ case circle::ActivationFunctionType_NONE:
+ case circle::ActivationFunctionType_RELU:
+ case circle::ActivationFunctionType_RELU6:
+ case circle::ActivationFunctionType_RELU_N1_TO_1:
+ break;
+ default:
+ assert(false && "Unsupported fused activation");
+ }
+}
+
+void execute_kernel_CircleConv2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph,
+ bool)
+{
+ const auto input_index = cur_op->inputs()->operator[](0);
+ const auto weight_index = cur_op->inputs()->operator[](1);
+ const auto bias_index = cur_op->inputs()->operator[](2);
+ const auto output_index = cur_op->outputs()->operator[](0);
+
+ assert(input_index != -1);
+ assert(weight_index != -1);
+ assert(output_index != -1);
+
+ const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+ const auto weights = runtime_graph->getCircleTensorByIndex(weight_index);
+ const auto bias = runtime_graph->getCircleTensorByIndex(bias_index);
+ const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+ assert(input != nullptr);
+ assert(weights != nullptr);
+ assert(output != nullptr);
+
+ const auto *options = cur_op->builtin_options_as_Conv2DOptions();
+
+ switch (Tensor::element_type(input))
+ {
+#ifndef DIS_FLOAT
+ case DataType::FLOAT32:
+ if (Tensor::element_type(weights) == DataType::FLOAT32)
+ {
+ evalFloat(input, weights, bias, output, options, runtime_graph);
+ break;
+ }
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+ case DataType::U8:
+ if (Tensor::scales(weights).size() == 1)
+ {
+ evalQuantized(input, weights, bias, output, options, runtime_graph);
+ }
+ else if (Tensor::scales(weights).size() > 1)
+ {
+ LUCI_INTERPRETER_CHECK(Tensor::num_dims(weights) == 4);
+ LUCI_INTERPRETER_CHECK(Tensor::scales(weights).size() ==
+ static_cast<size_t>(Tensor::dim(weights, 0)));
+ evalQuantizedPerChannel(input, weights, bias, output, options, runtime_graph);
+ }
+ break;
+ case DataType::S8:
+ evalQuantizedS8PerChannel(input, weights, bias, output, options, runtime_graph);
+ break;
+ case DataType::S16:
+ evalQuantizedS16(input, weights, bias, output, options, runtime_graph);
+ break;
+#endif // DIS_QUANT
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO enable it
+#if 0
+#include "kernels/Conv2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class Conv2DTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(Conv2DTest, Float)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(im2col);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 11, 16, 7, 20, // row = 0
+ 0, 40, 0, 44, // row = 1
+ };
+ std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, FloatPointwise)
+{
+ Shape input_shape{1, 2, 2, 2};
+ Shape filter_shape{2, 1, 1, 2};
+ Shape bias_shape{2};
+ std::vector<float> input_data{
+ 1, 2, // row = 0, col = 0
+ 3, 4, // row = 0, col = 1
+ 5, 6, // row = 1, col = 0
+ 7, 8, // row = 1, col = 1
+ };
+ std::vector<float> filter_data{
+ -1, 2, // out = 0
+ -3, 4, // out = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 1;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(im2col);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 4, 7, 6, 9, // row = 0
+ 8, 11, 10, 13, // row = 1
+ };
+ std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, FloatCheck)
+{
+ Shape input_shape{2, 2, 4, 1};
+ Shape filter_shape{3, 2, 2, 1};
+ Shape bias_shape{3};
+ std::vector<float> input_data{
+ // First batch
+ 1, 1, 1, 1, // row = 1
+ 2, 2, 2, 2, // row = 2
+ // Second batch
+ 1, 2, 3, 4, // row = 1
+ 1, 2, 3, 4, // row = 2
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, // first 2x2 filter
+ -1, 1, -1, 1, // second 2x2 filter
+ -1, -1, 1, 1, // third 2x2 filter
+ };
+ std::vector<float> bias_data{1, 2, 3};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::NONE;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 18, 2, 5, // first batch, left
+ 18, 2, 5, // first batch, right
+ 17, 4, 3, // second batch, left
+ 37, 4, 3, // second batch, right
+ };
+ std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, Uint8)
+{
+ std::vector<float> input_data{
+ // First batch
+ 1, 1, 1, 1, // row = 1
+ 2, 2, 2, 2, // row = 2
+ // Second batch
+ 1, 2, 3, 4, // row = 1
+ 1, 2, 3, 4, // row = 2
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, // first 2x2 filter
+ -1, 1, -1, 1, // second 2x2 filter
+ -1, -1, 1, 1, // third 2x2 filter
+ };
+ std::vector<float> bias_data{1, 2, 3};
+
+ std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first, input_quant_param.second,
+ filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>(
+ {3}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::U8, Shape({}), {}, "");
+ Tensor output_tensor =
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::NONE;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 18, 2, 5, // first batch, left
+ 18, 2, 5, // first batch, right
+ 17, 4, 3, // second batch, left
+ 37, 4, 3, // second batch, right
+ };
+ std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, Uint8_CWQ)
+{
+ const int output_channels = 3;
+ std::vector<float> input_data{
+ // First batch
+ 1, 1, 1, 1, // row = 1
+ 2, 2, 2, 2, // row = 2
+ // Second batch
+ 1, 2, 3, 4, // row = 1
+ 1, 2, 3, 4, // row = 2
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, // first 2x2 filter
+ -1, 1, -1, 1, // second 2x2 filter
+ -1, -1, 1, 1, // third 2x2 filter
+ };
+ std::vector<float> bias_data{1, 2, 3};
+ Shape filter_shape{output_channels, 2, 2, 1};
+
+ std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 4);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+ std::vector<std::pair<float, int32_t>> filter_quant_params;
+ filter_quant_params.push_back(quantizationParams<uint8_t>(0, 4));
+ filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
+ filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
+
+ std::vector<float> filter_scales;
+ std::vector<int32_t> filter_zerops;
+ for (auto iter : filter_quant_params)
+ {
+ filter_scales.push_back(iter.first);
+ filter_zerops.push_back(iter.second);
+ }
+
+ std::vector<float> bias_scales;
+ for (int i = 0; i < output_channels; ++i)
+ bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+ std::vector<int32_t> zerop(output_channels, 0);
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
+ 0, filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+ bias_data, _memory_manager.get());
+ Tensor im2col(DataType::U8, Shape({}), {}, "");
+ Tensor output_tensor =
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::NONE;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 18, 2, 5, // first batch, left
+ 18, 2, 5, // first batch, right
+ 17, 4, 3, // second batch, left
+ 37, 4, 3, // second batch, right
+ };
+ std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, SInt8_CWQ)
+{
+ const int output_channels = 3;
+ std::vector<float> input_data{
+ // First batch
+ 1, 1, 1, 1, // row = 1
+ 2, 2, 2, 2, // row = 2
+ // Second batch
+ 1, 2, 3, 4, // row = 1
+ 1, 2, 3, 4, // row = 2
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, // first 2x2 filter
+ -1, 1, -1, 1, // second 2x2 filter
+ -1, -1, 1, 1, // third 2x2 filter
+ };
+ std::vector<float> bias_data{1, 2, 3};
+ Shape filter_shape{output_channels, 2, 2, 1};
+
+ std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(0, 4);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+
+ std::vector<std::pair<float, int32_t>> filter_quant_params;
+ filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
+ filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
+ filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
+
+ std::vector<float> filter_scales;
+ std::vector<int32_t> filter_zerops;
+ for (auto iter : filter_quant_params)
+ {
+ filter_scales.push_back(iter.first);
+ filter_zerops.push_back(iter.second);
+ }
+
+ std::vector<float> bias_scales;
+ for (int i = 0; i < output_channels; ++i)
+ bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+ std::vector<int32_t> zerop(output_channels, 0);
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
+ 0, filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+ bias_data, _memory_manager.get());
+ Tensor im2col(DataType::S8, Shape({}), {}, "");
+ Tensor output_tensor =
+ makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::NONE;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 18, 2, 5, // first batch, left
+ 18, 2, 5, // first batch, right
+ 17, 4, 3, // second batch, left
+ 37, 4, 3, // second batch, right
+ };
+ std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, SInt16)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ std::vector<float> ref_output_data{
+ 11, 16, 7, 20, // row = 0
+ 0, 40, 0, 44, // row = 1
+ };
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::S16, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(Conv2DTest, SInt16_CWQ_weights)
+{
+ Shape input_shape{1, 2, 2, 2}; // Batch x H x W x C
+ Shape filter_shape{3, 1, 1, 2}; // Out channels x H x W x In Channels
+ Shape bias_shape{3};
+ std::vector<int32_t> ref_output_shape{1, 2, 2, 3};
+
+ std::vector<float> input_data{
+ 1, 2, // row = 0, col 0
+ 3, 4, // row = 0, col 1
+ 5, 6, // row = 1, col 0
+ 7, 8, // row = 1, col 1
+ };
+ std::vector<float> filter_data{
+ 4, -3, // out = 0
+ 1, -3, // out = 1
+ 5, -3, // out = 2
+ };
+ std::vector<float> bias_data{1, 10, 5};
+ std::vector<float> ref_output_data{
+ 0, 5, 4, // row 0, col 0
+ 1, 1, 8, // row 0, col 1
+ 3, 0, 12, // row 1, col 0
+ 5, 0, 16, // row 1, col 1
+ };
+
+ float input_scale = 0.25f;
+ float output_scale = 0.05f;
+ std::vector<float> filter_scales = {0.25f, 0.2f, 0.1f};
+ std::vector<float> bias_scales;
+ for (int i = 0; i < filter_scales.size(); ++i)
+ bias_scales.push_back(filter_scales[i] * input_scale);
+ std::vector<int32_t> zerop = {0, 0, 0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
+ filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+ _memory_manager.get());
+ Tensor im2col(DataType::S16, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 1;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(Conv2DTest, Unsupported_Type_Configure_NEG)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<int32_t> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_Bias_Type_NEG)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<uint8_t> bias_data{1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_Bias_Data_NEG)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{3};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2, 3};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_Input_Shape_NEG)
+{
+ Shape input_shape{1, 4, 6, 1};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_fused_act_tanh_NEG)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::TANH;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthToSpace.h"
+#include "Utils.h"
+#include "PALDepthToSpace.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams ¶ms)
+ : KernelWithParams<DepthToSpaceParams>({input}, {output}, params)
+{
+}
+
+void DepthToSpace::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
+ output()->element_type() == DataType::U8)
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type())
+ const int block_size = params().block_size;
+ const int32_t input_height = input()->shape().dim(1);
+ const int32_t input_width = input()->shape().dim(2);
+ const int32_t input_channels = input()->shape().dim(3);
+ int32_t output_height = input_height * block_size;
+ int32_t output_width = input_width * block_size;
+ int32_t output_channels = input_channels / block_size / block_size;
+
+ LUCI_INTERPRETER_CHECK(input_height == output_height / block_size);
+ LUCI_INTERPRETER_CHECK(input_width == output_width / block_size);
+ LUCI_INTERPRETER_CHECK(input_channels == output_channels * block_size * block_size);
+
+ Shape output_shape(4);
+ output_shape.dim(0) = input()->shape().dim(0);
+ output_shape.dim(1) = output_height;
+ output_shape.dim(2) = output_width;
+ output_shape.dim(3) = output_channels;
+
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(output_shape);
+}
+
+void DepthToSpace::execute() const
+{
+ tflite::DepthToSpaceParams op_params;
+ op_params.block_size = params().block_size;
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ default:
+ assert(false && "Unsupported Type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthwiseConv2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALDepthwiseConv2d.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
+ Tensor *output, Tensor *scratchpad,
+ const DepthwiseConv2DParams ¶ms)
+ : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output, scratchpad}, params)
+{
+}
+
+void DepthwiseConv2D::configure()
+{
+ // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
+ // | input filter bias output |
+ // ----+---------------------------+
+ // (1) | float float float float |
+ // (2) | float int8 float float | hybrid
+ // (3) | uint8 uint8 int32 uint8 | quantized
+ // (4) | int8 int8 int32 int8 | quantized per channel
+ // (5) | int16 int8 int64 int16 | quantized per channel 16x8
+ //
+ // We only support (1), (3) and (4) for now, and additionally the following:
+ // | input filter bias output |
+ // ----+---------------------------+
+ // (5) | int16 int16 int64 int16 |
+ //
+ if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
+ {
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+ }
+ else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+ }
+ else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(static_cast<uint32_t>(filter()->shape().dim(3)) ==
+ filter()->scales().size());
+ for (auto zerop : filter()->zero_points())
+ {
+ LUCI_INTERPRETER_CHECK(zerop == 0);
+ }
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+ }
+ else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
+ }
+ else
+ {
+ assert(false && "Unsupported type.");
+ }
+ LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
+
+ const Shape &input_shape = input()->shape();
+ const Shape &filter_shape = filter()->shape();
+ LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ // Filter format: [1, H, W, O].
+ LUCI_INTERPRETER_CHECK(filter_shape.dim(0) == 1);
+ const int32_t filter_height = filter_shape.dim(1);
+ const int32_t filter_width = filter_shape.dim(2);
+ const int32_t channels_out = filter_shape.dim(3);
+
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
+ bias()->shape().dim(0) == channels_out));
+
+ const int32_t output_height =
+ computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+ _params.dilation_height_factor);
+ const int32_t output_width =
+ computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+ _params.dilation_width_factor);
+
+ _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
+ input_height, filter_height, output_height);
+ _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
+ filter_width, output_width);
+
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize({batches, output_height, output_width, channels_out});
+
+ tflite::DepthwiseParams params{};
+
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+
+ auto scratchpad = getOutputTensors()[1];
+ luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(),
+ getTensorShape(input()), getTensorShape(filter()),
+ getTensorShape(output()));
+}
+
+void DepthwiseConv2D::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ if (filter()->element_type() == DataType::FLOAT32)
+ {
+ evalFloat();
+ break;
+ }
+ assert(false && "Unsupported type.");
+ case DataType::U8:
+ if (filter()->scales().size() == 1)
+ {
+ evalQuantized();
+ }
+ else if (filter()->scales().size() > 1)
+ {
+ LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+ static_cast<size_t>(filter()->shape().dim(3)));
+ evalQuantizedPerChannel();
+ }
+ break;
+ case DataType::S8:
+ evalQuantizedS8PerChannel();
+ break;
+ case DataType::S16:
+ evalQuantizedS16();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void DepthwiseConv2D::evalFloat() const
+{
+ float activation_min{};
+ float activation_max{};
+ calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+ tflite::DepthwiseParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+ params.depth_multiplier = _params.depth_multiplier;
+ params.float_activation_min = activation_min;
+ params.float_activation_max = activation_max;
+
+ tflite::reference_ops::DepthwiseConv(
+ params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+ getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void DepthwiseConv2D::evalQuantizedPerChannel() const
+{
+ const auto *input_data = getTensorData<uint8_t>(input());
+ const auto *filter_data = getTensorData<uint8_t>(filter());
+ const auto *bias_data = getTensorData<int32_t>(bias());
+ auto *output_data = getTensorData<uint8_t>(output());
+
+ const Shape &input_shape = input()->shape();
+ const Shape &filter_shape = filter()->shape();
+ const Shape &output_shape = output()->shape();
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t input_depth = input_shape.dim(3);
+ const int32_t filter_height = filter_shape.dim(1);
+ const int32_t filter_width = filter_shape.dim(2);
+ const int32_t output_height = output_shape.dim(1);
+ const int32_t output_width = output_shape.dim(2);
+
+ const int32_t stride_height = _params.stride_height;
+ const int32_t stride_width = _params.stride_width;
+ const int32_t dilation_height_factor = _params.dilation_height_factor;
+ const int32_t dilation_width_factor = _params.dilation_width_factor;
+ const int32_t depth_multiplier = _params.depth_multiplier;
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ const std::vector<double> effective_output_scales =
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+ std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
+ quantizeMultipliers(effective_output_scales);
+ BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
+
+ for (int batch = 0; batch < batches; ++batch)
+ {
+ for (int out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+ {
+ for (int m = 0; m < depth_multiplier; ++m)
+ {
+ const int output_channel = m + in_channel * depth_multiplier;
+ const int in_x_origin = (out_x * stride_width) - _padding_width;
+ const int in_y_origin = (out_y * stride_height) - _padding_height;
+ int32_t acc = 0;
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int in_x = in_x_origin + dilation_width_factor * filter_x;
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
+ // Zero padding by omitting the areas outside the image.
+ const bool is_point_inside_image =
+ (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+ if (is_point_inside_image)
+ {
+ int32_t input_val =
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)];
+ int32_t filter_val =
+ filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)];
+ acc += (filter_val - filter()->zero_points()[output_channel]) *
+ (input_val - input()->zero_point());
+ }
+ }
+ }
+ if (bias_data)
+ {
+ acc += bias_data[output_channel];
+ }
+ int32_t output_multiplier = quant_multipliers[output_channel].multiplier;
+ int output_shift = quant_multipliers[output_channel].shift;
+ int32_t scaled_acc =
+ tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ scaled_acc += output()->zero_point();
+ scaled_acc = std::max(scaled_acc, activation_min);
+ scaled_acc = std::min(scaled_acc, activation_max);
+ output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] =
+ static_cast<uint8_t>(scaled_acc);
+ }
+ }
+ }
+ }
+ }
+}
+
+void DepthwiseConv2D::evalQuantized() const
+{
+ const auto input_scale = static_cast<double>(input()->scale());
+ const auto filter_scale = static_cast<double>(filter()->scale());
+ const auto output_scale = static_cast<double>(output()->scale());
+
+ const double real_multiplier = input_scale * filter_scale / output_scale;
+ int32_t output_multiplier{};
+ int output_shift{};
+ quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::DepthwiseParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+ params.depth_multiplier = _params.depth_multiplier;
+ // The kernel expects input and filter zero points to be negated.
+ params.input_offset = -input()->zero_point(); // Note the '-'.
+ params.weights_offset = -filter()->zero_point(); // Note the '-'.
+ params.output_offset = output()->zero_point();
+ params.output_multiplier = output_multiplier;
+ params.output_shift = output_shift;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ tflite::reference_ops::DepthwiseConv(
+ params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
+ getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void DepthwiseConv2D::evalQuantizedS8PerChannel() const
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::DepthwiseParams params{};
+
+ params.padding_type = tflite::PaddingType::kSame;
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+ params.depth_multiplier = _params.depth_multiplier;
+ // The kernel expects input and filter zero points to be negated.
+ params.input_offset = -input()->zero_point(); // Note the '-'.
+ params.weights_offset = 0;
+ params.output_offset = output()->zero_point();
+ params.output_multiplier = 1; // unused in tflite code
+ params.output_shift = 0; // unused in tflite code
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ const std::vector<double> effective_output_scales =
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+ std::vector<ChannelQuantMultipliers> quant_multipliers =
+ quantizeMultipliers(effective_output_scales);
+
+ std::vector<int32_t> shifts;
+ std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+ [](ChannelQuantMultipliers cm) { return cm.shift; });
+ std::vector<int32_t> multipliers;
+ std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+ std::back_inserter(multipliers),
+ [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+ auto scratchpad = getOutputTensors()[1];
+ int8_t *scratchpad_data = nullptr;
+ if (scratchpad->is_allocatable())
+ scratchpad_data = scratchpad->data<int8_t>();
+
+ luci_interpreter_pal::DepthwiseConvPerChannel<int8_t>(
+ params, multipliers.data(), shifts.data(), getTensorShape(input()),
+ getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
+ getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+ getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void DepthwiseConv2D::evalQuantizedS16() const
+{
+ const auto *input_data = getTensorData<int16_t>(input());
+ const auto *filter_data = getTensorData<int16_t>(filter());
+ const auto *bias_data = getTensorData<int64_t>(bias());
+ auto *output_data = getTensorData<int16_t>(output());
+
+ const Shape &input_shape = input()->shape();
+ const Shape &filter_shape = filter()->shape();
+ const Shape &output_shape = output()->shape();
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t input_depth = input_shape.dim(3);
+ const int32_t filter_height = filter_shape.dim(1);
+ const int32_t filter_width = filter_shape.dim(2);
+ const int32_t output_height = output_shape.dim(1);
+ const int32_t output_width = output_shape.dim(2);
+
+ const int32_t stride_height = _params.stride_height;
+ const int32_t stride_width = _params.stride_width;
+ const int32_t dilation_height_factor = _params.dilation_height_factor;
+ const int32_t dilation_width_factor = _params.dilation_width_factor;
+ const int32_t depth_multiplier = _params.depth_multiplier;
+
+ const std::vector<double> effective_output_scales =
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+ std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
+ quantizeMultipliers(effective_output_scales);
+
+ BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ for (int32_t batch = 0; batch < batches; ++batch)
+ {
+ for (int32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+ {
+ for (int32_t m = 0; m < depth_multiplier; ++m)
+ {
+ const int32_t out_c = m + in_c * depth_multiplier;
+ const int32_t in_y_origin = out_y * stride_height - _padding_height;
+ const int32_t in_x_origin = out_x * stride_width - _padding_width;
+ int64_t acc = 0;
+ for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+ const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+ if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+ {
+ const int16_t input_val =
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+ const int16_t filter_val =
+ filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)];
+ acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+ }
+ }
+ }
+ if (bias_data != nullptr)
+ {
+ acc += bias_data[out_c];
+ }
+
+ int32_t output_multiplier = quant_multipliers[out_c].multiplier;
+ int output_shift = quant_multipliers[out_c].shift;
+ int32_t scaled_acc =
+ tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+
+ scaled_acc = std::max(scaled_acc, activation_min);
+ scaled_acc = std::min(scaled_acc, activation_max);
+
+ output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+ }
+ }
+ }
+ }
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/Utils.h"
+#include "PALDequantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Dequantize::Dequantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Dequantize::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8 ||
+ input()->element_type() == DataType::U8 ||
+ input()->element_type() == DataType::S16);
+
+ LUCI_INTERPRETER_CHECK(input()->scales().size() == 1);
+
+ if (input()->element_type() == DataType::S16)
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void Dequantize::execute() const
+{
+ tflite::DequantizationParams op_params;
+ op_params.zero_point = input()->zero_point();
+ op_params.scale = input()->scale();
+
+ switch (input()->element_type())
+ {
+ case DataType::U8:
+ {
+ luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ }
+ case DataType::S8:
+ {
+ luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+ getTensorData<int8_t>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ }
+ case DataType::S16:
+ {
+ luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+ getTensorData<int16_t>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ }
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Div.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/div.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Div::Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams ¶ms)
+ : KernelWithParams<DivParams>({input1, input2}, {output}, params)
+{
+}
+
+void Div::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Div::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Div::evalFloat() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<float>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastDivSlow(
+ params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+ getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<float>(input1()),
+ getTensorShape(input2()), getTensorData<float>(input2()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ }
+}
+
+template <typename T> void Div::evalInteger() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<T>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastDivSlow(
+ params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+ getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
+void Div::evalQuantized() const
+{
+ const auto input1_scale = static_cast<double>(input1()->scale());
+ const auto input2_scale = static_cast<double>(input2()->scale());
+ const auto output_scale = static_cast<double>(output()->scale());
+
+ const double real_output_multiplier = input1_scale / (input2_scale * output_scale);
+
+ int32_t output_multiplier{};
+ int output_shift{};
+
+ quantizeMultiplier(real_output_multiplier, &output_multiplier, &output_shift);
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::ArithmeticParams params{};
+
+ params.input1_offset = -input1()->zero_point(); // Note the '-'.
+ params.input2_offset = -input2()->zero_point(); // Note the '-'.
+ params.output_offset = output()->zero_point();
+ params.output_multiplier = output_multiplier;
+ params.output_shift = output_shift;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastDivSlow(
+ params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+ getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+ getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Elu.h"
+#include "kernels/Utils.h"
+
+#include "PALElu.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Elu::Elu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Elu::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void Elu::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ luci_interpreter_pal::Elu(getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Equal.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Equal::Equal(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Equal::configure()
+{
+ LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+ if (x()->element_type() == DataType::U8)
+ {
+ quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+ quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Equal::execute() const
+{
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Equal::evalFloat() const
+{
+ const auto x_data = getTensorData<float>(x());
+ const auto y_data = getTensorData<float>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowEqual(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::Equal(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+ y_data, getTensorShape(output()), output_data);
+ }
+}
+
+template <typename T> void Equal::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::EqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
+void Equal::evalQuantized() const
+{
+ const auto x_data = getTensorData<uint8_t>(x());
+ const auto y_data = getTensorData<uint8_t>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.left_shift = 8;
+ op_params.input1_offset = -x()->zero_point(); // Note the '-'
+ op_params.input1_shift = _x_shift;
+ op_params.input1_multiplier = _x_multiplier;
+ op_params.input2_offset = -y()->zero_point(); // Note the '-'
+ op_params.input2_shift = _y_shift;
+ op_params.input2_multiplier = _y_multiplier;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowEqualWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::EqualWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Exp.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/exp.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Exp::Exp(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Exp::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void Exp::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Exp::evalFloat() const
+{
+ const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+ tflite::reference_ops::Exp(getTensorData<float>(input()), size, getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleExpandDims(const circle::Operator *cur_op,
+ BaseRuntimeGraph *runtime_graph)
+{
+ const auto input_index = cur_op->inputs()->operator[](0);
+ const auto axis_index = cur_op->inputs()->operator[](1);
+ const auto output_index = cur_op->outputs()->operator[](0);
+
+ assert(input_index != -1);
+ assert(axis_index != -1);
+ assert(output_index != -1);
+
+ const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+ const auto axis = runtime_graph->getCircleTensorByIndex(axis_index);
+ auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+ assert(input != nullptr);
+ assert(axis != nullptr);
+ assert(output != nullptr);
+
+ auto axis_data = runtime_graph->getConstDataByTensor(axis);
+
+ int32_t axis_value;
+
+ switch (Tensor::element_type(axis))
+ {
+ case DataType::S32:
+ axis_value = *reinterpret_cast<int32_t *>(axis_data);
+ break;
+ case DataType::S64:
+ axis_value = static_cast<int32_t>(*reinterpret_cast<int64_t *>(axis_data));
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+
+ if (axis_value < 0)
+ {
+ axis_value += Tensor::num_dims(input) + 1;
+ }
+
+ LUCI_INTERPRETER_CHECK(axis_value <= Tensor::num_dims(input) and axis_value >= 0);
+}
+
+void execute_kernel_CircleExpandDims(const circle::Operator *cur_op,
+ BaseRuntimeGraph *runtime_graph, bool is_inplace)
+{
+ const auto input_index = cur_op->inputs()->operator[](0);
+ const auto output_index = cur_op->outputs()->operator[](0);
+
+ assert(input_index != -1);
+ assert(output_index != -1);
+
+ const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+ const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+ if (is_inplace)
+ {
+ runtime_graph->makeInplaceOperation(input, output);
+ return;
+ }
+
+ // Just copy input to output
+ const auto input_data = runtime_graph->getDataByTensor(input);
+ auto output_data = runtime_graph->getDataByTensor(output);
+
+ assert(input_data != nullptr);
+ assert(output_data != nullptr);
+
+ const size_t element_size = getDataTypeSize(Tensor::element_type(input));
+ const int32_t num_elements = Tensor::num_elements(input);
+ std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO enable it
+#if 0
+#include "kernels/ExpandDims.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ExpandDimsTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ExpandDimsTest, PositiveAxis)
+{
+ std::vector<int32_t> input_data{-1, 1, -2, 2};
+ std::initializer_list<int32_t> input_shape = {2, 2};
+
+ std::initializer_list<int32_t> axis_value = {0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2}));
+}
+
+TEST_F(ExpandDimsTest, NegAxis)
+{
+ std::vector<int32_t> input_data{-1, 1, -2, 2};
+ std::initializer_list<int32_t> input_shape = {2, 2};
+
+ std::initializer_list<int32_t> axis_value = {-1};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 1}));
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisType_NEG)
+{
+ std::vector<int32_t> input_data{-1, 1, -2, 2};
+ std::initializer_list<int32_t> input_shape = {2, 2};
+
+ std::initializer_list<float> axis_value = {1.0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::FLOAT32>({1}, axis_value, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisValue_NEG)
+{
+ std::vector<int32_t> input_data{-1, 1, -2, 2};
+ std::initializer_list<int32_t> input_shape = {2, 2};
+
+ std::initializer_list<int32_t> axis_value = {3};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Fill.h"
+#include "kernels/Utils.h"
+#include "PALFill.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Fill::Fill(const Tensor *dims, const Tensor *value, Tensor *output)
+ : Kernel({dims, value}, {output})
+{
+}
+
+template <typename T> void Fill::configureShape()
+{
+ const auto dims_data = getTensorData<T>(dims());
+ Shape output_shape(dims()->shape().dim(0));
+
+ for (int i = 0; i < output_shape.num_dims(); ++i)
+ {
+ T data = dims_data[i];
+ if (data < 0)
+ assert(false && "Fill dimensions must be >= 0");
+
+ output_shape.dim(i) = data;
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(output_shape);
+}
+
+void Fill::configure()
+{
+ const auto dims_shape = dims()->shape();
+ const auto value_shape = value()->shape();
+
+ // Make sure the 1st input tensor is 1-D
+ LUCI_INTERPRETER_CHECK(dims_shape.num_dims() == 1);
+
+ // Make sure the 1st input tensor is int32 or int64
+ LUCI_INTERPRETER_CHECK(dims()->element_type() == DataType::S32 or
+ dims()->element_type() == DataType::S64);
+
+ // Make sure the 2nd input tensor is a scalar
+ LUCI_INTERPRETER_CHECK(value_shape.num_dims() == 0)
+
+ // Check zero point and scale for S16 and S8
+ if (value()->element_type() == DataType::S16 or value()->element_type() == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(value()->scale() == output()->scale());
+ LUCI_INTERPRETER_CHECK(value()->zero_point() == output()->zero_point());
+
+ if (value()->element_type() == DataType::S16)
+ LUCI_INTERPRETER_CHECK(value()->zero_point() == 0);
+ }
+ // Resize output
+ switch (dims()->element_type())
+ {
+ case DataType::S32:
+ configureShape<int32_t>();
+ break;
+ case DataType::S64:
+ configureShape<int64_t>();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Fill::execute() const
+{
+ switch (output()->element_type())
+ {
+ case DataType::S8:
+ tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int8_t>(value()),
+ getTensorShape(output()), getTensorData<int8_t>(output()));
+ break;
+ case DataType::S16:
+ tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int16_t>(value()),
+ getTensorShape(output()), getTensorData<int16_t>(output()));
+ break;
+ case DataType::S32:
+ tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int32_t>(value()),
+ getTensorShape(output()), getTensorData<int32_t>(output()));
+ break;
+ case DataType::S64:
+ tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int64_t>(value()),
+ getTensorShape(output()), getTensorData<int64_t>(output()));
+ break;
+ case DataType::FLOAT32:
+ tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<float>(value()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Floor.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/floor.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Floor::Floor(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Floor::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void Floor::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Floor::evalFloat() const
+{
+ tflite::reference_ops::Floor(getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FloorDiv.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+FloorDiv::FloorDiv(const Tensor *input, const Tensor *alpha, Tensor *output)
+ : Kernel({input, alpha}, {output})
+{
+}
+
+void FloorDiv::configure()
+{
+ LUCI_INTERPRETER_CHECK(x()->element_type() == output()->element_type());
+ LUCI_INTERPRETER_CHECK(y()->element_type() == output()->element_type());
+
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void FloorDiv::execute() const
+{
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void FloorDiv::evalFloat() const
+{
+ auto FloorDivFunc = [](float x, float y) -> float {
+ return std::floor(static_cast<double>(x) / static_cast<double>(y));
+ };
+
+ const auto x_data = getTensorData<float>(x());
+ const auto y_data = getTensorData<float>(y());
+
+ // Check the denominator
+ for (int i = 0; i < getTensorShape(y()).FlatSize(); ++i)
+ {
+ LUCI_INTERPRETER_CHECK(y_data[i] != 0);
+ }
+
+ if (x()->shape() != y()->shape())
+ {
+ tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+ getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ getTensorData<float>(output()), FloorDivFunc);
+ }
+ else
+ {
+ tflite::reference_ops::BinaryFunction<float, float, float>(
+ getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ getTensorData<float>(output()), FloorDivFunc);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "PALFullyConnected.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+void evalFloat(const circle::Tensor *input, const circle::Tensor *weights,
+ const circle::Tensor *bias, const circle::Tensor *output,
+ const circle::FullyConnectedOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+ float activation_min{};
+ float activation_max{};
+ kernels::calculateActivationRange(luci_actfunc(options->fused_activation_function()),
+ &activation_min, &activation_max);
+
+ tflite::FullyConnectedParams params{};
+ params.float_activation_min = activation_min;
+ params.float_activation_max = activation_max;
+ params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
+
+ auto *input_data = runtime_graph->getDataByTensor(input);
+ auto *output_data = runtime_graph->getDataByTensor(output);
+
+ auto *weights_data = runtime_graph->getConstDataByTensor(weights);
+ auto *bias_data = runtime_graph->getConstDataByTensor(bias);
+
+ assert(input_data != nullptr);
+ assert(weights_data != nullptr);
+ assert(output_data != nullptr);
+
+ tflite::reference_ops::FullyConnected(
+ params, kernels::getTensorShape(input), kernels::getTensorData<float>(input_data),
+ kernels::getTensorShape(weights), kernels::getTensorData<float>(weights_data),
+ kernels::getTensorShape(bias), kernels::getTensorData<float>(bias_data),
+ kernels::getTensorShape(output), kernels::getTensorData<float>(output_data));
+}
+
+#ifndef DIS_QUANT
+void evalQuantized(const circle::Tensor *input, const circle::Tensor *weights,
+ const circle::Tensor *bias, const circle::Tensor *output,
+ const circle::FullyConnectedOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+ double real_multiplier = 0.0;
+ int output_shift;
+ int32_t output_activation_min;
+ int32_t output_activation_max;
+ int32_t output_multiplier;
+ real_multiplier = kernels::getQuantizedConvolutionMultipler(
+ Tensor::scale(input), Tensor::scale(weights), Tensor::scale(output));
+ kernels::quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+ kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+ output, &output_activation_min,
+ &output_activation_max);
+
+ int32_t input_offset = -Tensor::zero_point(input);
+ int32_t filter_offset = -Tensor::zero_point(weights);
+ int32_t output_offset = Tensor::zero_point(output);
+
+ tflite::FullyConnectedParams op_params{};
+ op_params.input_offset = input_offset;
+ op_params.weights_offset = filter_offset;
+ op_params.output_offset = output_offset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.lhs_cacheable = false;
+ op_params.rhs_cacheable = false;
+
+ auto *input_data = runtime_graph->getDataByTensor(input);
+ auto *output_data = runtime_graph->getDataByTensor(output);
+
+ auto *weights_data = runtime_graph->getConstDataByTensor(weights);
+ auto *bias_data = runtime_graph->getConstDataByTensor(bias);
+
+ assert(input_data != nullptr);
+ assert(weights_data != nullptr);
+ assert(output_data != nullptr);
+
+ tflite::reference_ops::FullyConnected(
+ op_params, kernels::getTensorShape(input), kernels::getTensorData<uint8_t>(input_data),
+ kernels::getTensorShape(weights), kernels::getTensorData<uint8_t>(weights_data),
+ kernels::getTensorShape(bias), kernels::getTensorData<int32_t>(bias_data),
+ kernels::getTensorShape(output), kernels::getTensorData<uint8_t>(output_data));
+}
+
+void evalQuantizedS8(const circle::Tensor *input, const circle::Tensor *weights,
+ const circle::Tensor *bias, const circle::Tensor *output,
+ const circle::FullyConnectedOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+ double real_multiplier = 0.0;
+ int output_shift;
+ int32_t output_activation_min;
+ int32_t output_activation_max;
+ int32_t output_multiplier;
+ real_multiplier = kernels::getQuantizedConvolutionMultipler(
+ Tensor::scale(input), Tensor::scale(weights), Tensor::scale(output));
+ kernels::quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+ kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+ output, &output_activation_min,
+ &output_activation_max);
+
+ int32_t input_offset = -Tensor::zero_point(input);
+ int32_t filter_offset = -Tensor::zero_point(weights);
+ int32_t output_offset = Tensor::zero_point(output);
+
+ tflite::FullyConnectedParams op_params{};
+ op_params.input_offset = input_offset;
+ op_params.weights_offset = filter_offset;
+ op_params.output_offset = output_offset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.lhs_cacheable = false;
+ op_params.rhs_cacheable = false;
+
+ auto *input_data = runtime_graph->getDataByTensor(input);
+ auto *output_data = runtime_graph->getDataByTensor(output);
+
+ auto *weights_data = runtime_graph->getConstDataByTensor(weights);
+ auto *bias_data = runtime_graph->getConstDataByTensor(bias);
+
+ assert(input_data != nullptr);
+ assert(weights_data != nullptr);
+ assert(output_data != nullptr);
+
+ luci_interpreter_pal::FullyConnected<int8_t>(
+ op_params, kernels::getTensorShape(input), kernels::getTensorData<int8_t>(input_data),
+ kernels::getTensorShape(weights), kernels::getTensorData<int8_t>(weights_data),
+ kernels::getTensorShape(bias), kernels::getTensorData<int32_t>(bias_data),
+ kernels::getTensorShape(output), kernels::getTensorData<int8_t>(output_data));
+}
+#endif
+
+} // namespace
+
+// TODO think how remove unused param
+void configure_kernel_CircleFullyConnected(const circle::Operator *cur_op,
+ BaseRuntimeGraph *runtime_graph)
+{
+ const auto input_index = cur_op->inputs()->operator[](0);
+ const auto weight_index = cur_op->inputs()->operator[](1);
+ const auto bias_index = cur_op->inputs()->operator[](2);
+ const auto output_index = cur_op->outputs()->operator[](0);
+
+ assert(input_index != -1);
+ assert(weight_index != -1);
+ assert(output_index != -1);
+
+ const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+ const auto weights = runtime_graph->getCircleTensorByIndex(weight_index);
+ const auto bias = runtime_graph->getCircleTensorByIndex(bias_index);
+ const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+ assert(input != nullptr);
+ assert(weights != nullptr);
+ assert(output != nullptr);
+
+#ifndef DIS_FLOAT
+ if (Tensor::element_type(weights) == DataType::FLOAT32)
+ {
+ LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(!bias || Tensor::element_type(bias) == DataType::FLOAT32)
+ }
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+ else if (Tensor::element_type(weights) == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == DataType::U8);
+ LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == DataType::U8);
+ LUCI_INTERPRETER_CHECK(!bias || Tensor::element_type(bias) == DataType::S32)
+ }
+ else if (Tensor::element_type(weights) == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == DataType::S8);
+ LUCI_INTERPRETER_CHECK(Tensor::element_type(output) == DataType::S8);
+ LUCI_INTERPRETER_CHECK(!bias || Tensor::element_type(bias) == DataType::S32)
+ }
+#endif // DIS_QUANT
+ else
+ {
+ assert(false && "Unsupported type.");
+ }
+
+ LUCI_INTERPRETER_CHECK(Tensor::num_dims(weights) == 2);
+ LUCI_INTERPRETER_CHECK(bias == nullptr || Tensor::num_elements(bias) == Tensor::dim(weights, 0));
+ LUCI_INTERPRETER_CHECK(Tensor::num_elements(input) % Tensor::dim(weights, 1) == 0);
+
+ if (bias)
+ LUCI_INTERPRETER_CHECK(Tensor::num_elements(bias) == Tensor::dim(weights, 0));
+
+ const auto *options = cur_op->builtin_options_as_FullyConnectedOptions();
+
+ // TODO: handle with it
+ assert(options->keep_num_dims() == false);
+}
+
+// TODO think how remove unused param
+void execute_kernel_CircleFullyConnected(const circle::Operator *cur_op,
+ BaseRuntimeGraph *runtime_graph, bool)
+{
+ const auto input_index = cur_op->inputs()->operator[](0);
+ const auto weight_index = cur_op->inputs()->operator[](1);
+ const auto bias_index = cur_op->inputs()->operator[](2);
+ const auto output_index = cur_op->outputs()->operator[](0);
+
+ assert(input_index != -1);
+ assert(weight_index != -1);
+ assert(output_index != -1);
+
+ const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+ const auto weights = runtime_graph->getCircleTensorByIndex(weight_index);
+ const auto bias = runtime_graph->getCircleTensorByIndex(bias_index);
+ const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+ assert(input != nullptr);
+ assert(weights != nullptr);
+ assert(output != nullptr);
+
+ const auto *options = cur_op->builtin_options_as_FullyConnectedOptions();
+
+ switch (Tensor::element_type(input))
+ {
+#ifndef DIS_QUANT
+ case DataType::U8:
+ evalQuantized(input, weights, bias, output, options, runtime_graph);
+ break;
+ case DataType::S8:
+ evalQuantizedS8(input, weights, bias, output, options, runtime_graph);
+ break;
+#endif // DIS_QUANT
+#ifndef DIS_FLOAT
+ case DataType::FLOAT32:
+ evalFloat(input, weights, bias, output, options, runtime_graph);
+ break;
+#endif // DIS_FLOAT
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// TODO enable it
+#if 0
+#include "kernels/FullyConnected.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
+ std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
+ std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ FullyConnectedParams params{};
+ params.activation = Activation::RELU;
+
+ FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+ EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<int8_t>(std::initializer_list<int32_t> input_shape,
+ std::initializer_list<int32_t> weights_shape,
+ std::initializer_list<int32_t> bias_shape,
+ std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data,
+ std::initializer_list<float> weights_data,
+ std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ const float quantized_tolerance = getTolerance(-127, 128, 255);
+ std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-63.5, 64);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::S8>(weights_shape, input_quant_param.first, input_quant_param.second,
+ weights_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
+ bias_data, memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+ FullyConnectedParams params{};
+ params.activation = Activation::RELU;
+
+ FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <>
+void Check<uint8_t>(
+ std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
+ std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
+ std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ const float quantized_tolerance = getTolerance(-127, 128, 255);
+ std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first, input_quant_param.second,
+ weights_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
+ bias_data, memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+ FullyConnectedParams params{};
+ params.activation = Activation::RELU;
+
+ FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <typename T> class FullyConnectedTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(FullyConnectedTest, DataTypes);
+
+TYPED_TEST(FullyConnectedTest, Simple)
+{
+ Check<TypeParam>({3, 2, 2, 1}, {3, 6}, {3}, {2, 3},
+ {
+ -3, -5, 5, 4, 9, -2, // batch = 0
+ -3, -2, -4, 9, -8, 1, // batch = 1
+ },
+ {
+ -3, -7, 4, -4, -6, 4, // unit = 0
+ 3, 5, 2, 3, -3, -8, // unit = 1
+ -3, 7, 4, 9, 0, -5, // unit = 2
+ },
+ {-1, -5, -8},
+ {
+ 0, 0, 32, // batch = 0
+ 22, 11, 47, // batch = 1
+ });
+}
+
+TEST(FullyConnectedTest, InvalidBiasType_NEG)
+{
+ Shape input_shape{3, 2, 2, 1};
+ std::vector<float> input_data{
+ -3, -5, 5, 4, 9, -2, // batch = 0
+ -3, -2, -4, 9, -8, 1, // batch = 1
+ };
+ Shape weights_shape{3, 6};
+ std::vector<float> weights_data{
+ -3, -7, 4, -4, -6, 4, // unit = 0
+ 3, 5, 2, 3, -3, -8, // unit = 1
+ -3, 7, 4, 9, 0, -5, // unit = 2
+ };
+ Shape bias_shape{3};
+ std::vector<int32_t> bias_data{-1, -5, -8};
+
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ FullyConnectedParams params{};
+ params.activation = Activation::RELU;
+
+ FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(FullyConnectedTest, InvalidWeightShapeDim_NEG)
+{
+ Shape input_shape{3, 2, 2, 1};
+ std::vector<float> input_data{
+ -3, -5, 5, 4, 9, -2, // batch = 0
+ -3, -2, -4, 9, -8, 1, // batch = 1
+ };
+ Shape weights_shape{1, 3, 6};
+ std::vector<float> weights_data{
+ -3, -7, 4, -4, -6, 4, // unit = 0
+ 3, 5, 2, 3, -3, -8, // unit = 1
+ -3, 7, 4, 9, 0, -5, // unit = 2
+ };
+ Shape bias_shape{3};
+ std::vector<float> bias_data{-1, -5, -8};
+
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ FullyConnectedParams params{};
+ params.activation = Activation::RELU;
+
+ FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(FullyConnectedTest, BiasElementNumWeightDimMismatch_NEG)
+{
+ Shape input_shape{3, 2, 2, 1};
+ std::vector<float> input_data{
+ -3, -5, 5, 4, 9, -2, // batch = 0
+ -3, -2, -4, 9, -8, 1, // batch = 1
+ };
+ Shape weights_shape{6, 3};
+ std::vector<float> weights_data{
+ -3, -7, 4, // unit = 0
+ -4, -6, 4, // unit = 1
+ 3, 5, 2, // unit = 2
+ 3, -3, -8, // unit = 3
+ -3, 7, 4, // unit = 4
+ 9, 0, -5, // unit = 5
+ };
+ Shape bias_shape{3};
+ std::vector<float> bias_data{-1, -5, -8};
+
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ FullyConnectedParams params{};
+ params.activation = Activation::RELU;
+
+ FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gather.h"
+#include "kernels/Utils.h"
+#include "PALGather.h"
+
+#include <cassert>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Gather::Gather(const Tensor *params, const Tensor *indices, Tensor *output,
+ const GatherParams &gparams)
+ : KernelWithParams<GatherParams>({params, indices}, {output}, gparams)
+{
+}
+
+void Gather::configure()
+{
+ if (params()->element_type() == DataType::FLOAT32)
+ {
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+ }
+ else
+ {
+ assert(false && "Unsupported type.");
+ }
+
+ LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32 ||
+ indices()->element_type() == DataType::S64);
+
+ // refer tensorflow/lite/kernels/gather.cc
+
+ const Shape ¶ms_shape = params()->shape();
+ const Shape &indices_shape = indices()->shape();
+
+ int axis = _params.axis;
+ if (axis < 0)
+ {
+ axis += params_shape.num_dims();
+ }
+ LUCI_INTERPRETER_CHECK(0 <= axis && axis < params_shape.num_dims());
+
+ int batch_dims = _params.batch_dims;
+ // batch_dims should be in range: [-rank(indices), rank(indices)].
+ // Negative batch_dims is added with rank of positions.
+ if (batch_dims < 0)
+ {
+ batch_dims += indices_shape.num_dims();
+ }
+ LUCI_INTERPRETER_CHECK(batch_dims <= axis);
+ LUCI_INTERPRETER_CHECK(0 <= batch_dims && batch_dims < params_shape.num_dims());
+ LUCI_INTERPRETER_CHECK(batch_dims <= indices_shape.num_dims());
+ for (int i = 0; i < batch_dims; ++i)
+ {
+ LUCI_INTERPRETER_CHECK(params_shape.dim(i) == indices_shape.dim(i));
+ }
+
+ const int num_dimensions = params_shape.num_dims() + indices_shape.num_dims() - 1 - batch_dims;
+
+ Shape output_shape(num_dimensions);
+ int output_index = 0;
+ for (int i = 0; i < axis; ++i)
+ {
+ output_shape.dim(output_index++) = params_shape.dim(i);
+ }
+ for (int i = batch_dims; i < indices_shape.num_dims(); ++i)
+ {
+ output_shape.dim(output_index++) = indices_shape.dim(i);
+ }
+ for (int i = axis + 1; i < params_shape.num_dims(); ++i)
+ {
+ output_shape.dim(output_index++) = params_shape.dim(i);
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(output_shape);
+}
+
+void Gather::execute() const
+{
+ switch (params()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Gather::evalFloat() const
+{
+ assert(indices()->element_type() == DataType::S32 || indices()->element_type() == DataType::S64);
+
+ const auto params_data = getTensorData<float>(params());
+ auto output_data = getTensorData<float>(output());
+
+ tflite::GatherParams tparams;
+ tparams.axis = _params.axis;
+ tparams.batch_dims = _params.batch_dims;
+
+ if (indices()->element_type() == DataType::S32)
+ {
+ const auto indices_data = getTensorData<int32_t>(indices());
+
+ luci_interpreter_pal::Gather<float, int32_t>(tparams, getTensorShape(params()), params_data,
+ getTensorShape(indices()), indices_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ const auto indices_data = getTensorData<int64_t>(indices());
+
+ luci_interpreter_pal::Gather<float, int64_t>(tparams, getTensorShape(params()), params_data,
+ getTensorShape(indices()), indices_data,
+ getTensorShape(output()), output_data);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Greater.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Greater::Greater(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Greater::configure()
+{
+ LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+ if (x()->element_type() == DataType::U8)
+ {
+ quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+ quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Greater::execute() const
+{
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Greater::evalFloat() const
+{
+ const auto x_data = getTensorData<float>(x());
+ const auto y_data = getTensorData<float>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowGreater(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::Greater(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+ y_data, getTensorShape(output()), output_data);
+ }
+}
+
+template <typename T> void Greater::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowGreaterNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::GreaterNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
+void Greater::evalQuantized() const
+{
+ const auto x_data = getTensorData<uint8_t>(x());
+ const auto y_data = getTensorData<uint8_t>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.left_shift = 8;
+ op_params.input1_offset = -x()->zero_point(); // Note the '-'
+ op_params.input1_shift = _x_shift;
+ op_params.input1_multiplier = _x_multiplier;
+ op_params.input2_offset = -y()->zero_point(); // Note the '-'
+ op_params.input2_shift = _y_shift;
+ op_params.input2_multiplier = _y_multiplier;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowGreaterWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::GreaterWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/GreaterEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+GreaterEqual::GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output)
+ : Kernel({x, y}, {output})
+{
+}
+
+void GreaterEqual::configure()
+{
+ LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+ if (x()->element_type() == DataType::U8)
+ {
+ quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+ quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void GreaterEqual::execute() const
+{
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void GreaterEqual::evalFloat() const
+{
+ const auto x_data = getTensorData<float>(x());
+ const auto y_data = getTensorData<float>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowGreaterEqual(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::GreaterEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+ y_data, getTensorShape(output()), output_data);
+ }
+}
+
+template <typename T> void GreaterEqual::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
+ op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+ else
+ {
+ tflite::reference_ops::GreaterEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+}
+
+void GreaterEqual::evalQuantized() const
+{
+ const auto x_data = getTensorData<uint8_t>(x());
+ const auto y_data = getTensorData<uint8_t>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.left_shift = 8;
+ op_params.input1_offset = -x()->zero_point(); // Note the '-'
+ op_params.input1_shift = _x_shift;
+ op_params.input1_multiplier = _x_multiplier;
+ op_params.input2_offset = -y()->zero_point(); // Note the '-'
+ op_params.input2_shift = _y_shift;
+ op_params.input2_multiplier = _y_multiplier;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
+ op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+ else
+ {
+ tflite::reference_ops::GreaterEqualWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/InstanceNorm.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/common.h>
+#include <cmath>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+InstanceNorm::InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta,
+ Tensor *output, const InstanceNormParams ¶ms)
+ : KernelWithParams<InstanceNormParams>({input, gamma, beta}, {output}, params)
+{
+}
+
+void InstanceNorm::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ LUCI_INTERPRETER_CHECK(gamma()->element_type() == input()->element_type());
+ LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(3) ||
+ gamma()->shape().dim(0) == 1);
+ LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type());
+ LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(3) ||
+ beta()->shape().dim(0) == 1);
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void InstanceNorm::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void InstanceNorm::evalFloat() const
+{
+ float activation_min, activation_max;
+ calculateActivationRange(params().activation, &activation_min, &activation_max);
+ auto input_shape = getTensorShape(input());
+ auto output_shape = getTensorShape(output());
+ const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int32_t heights = tflite::MatchingDim(input_shape, 1, output_shape, 1);
+ const int32_t widths = tflite::MatchingDim(input_shape, 2, output_shape, 2);
+ const int32_t channels = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+ const float *input_data = getTensorData<float>(input());
+ const float *gamma_data = getTensorData<float>(gamma());
+ auto gamma_shape = getTensorShape(gamma());
+ bool single_gamma = gamma_shape.DimensionsCount() == 1 && gamma_shape.Dims(0) == 1;
+ const float *beta_data = getTensorData<float>(beta());
+ auto beta_shape = getTensorShape(beta());
+ bool single_beta = beta_shape.DimensionsCount() == 1 && beta_shape.Dims(0) == 1;
+ float *output_data = getTensorData<float>(output());
+ for (int32_t batch = 0; batch < batches; batch++)
+ {
+ for (int32_t channel = 0; channel < channels; channel++)
+ {
+ double sum = 0.0f;
+ double square_sum = 0.0f;
+ int32_t size = heights * widths;
+ for (int32_t height = 0; height < heights; height++)
+ {
+ for (int32_t width = 0; width < widths; width++)
+ {
+ double input_val = input_data[tflite::Offset(input_shape, batch, height, width, channel)];
+ sum += input_val;
+ square_sum += (input_val * input_val);
+ }
+ }
+ double mean = sum / size;
+ double var = square_sum / size - mean * mean;
+
+ double gamma = single_gamma ? gamma_data[0] : gamma_data[channel];
+ double beta = single_beta ? beta_data[0] : beta_data[channel];
+ double a = gamma / (std::sqrt(var + params().epsilon));
+ double b = -mean * a + beta;
+
+ for (int32_t height = 0; height < heights; height++)
+ {
+ for (int32_t width = 0; width < widths; width++)
+ {
+ double input_value =
+ input_data[tflite::Offset(output_shape, batch, height, width, channel)];
+ double output_value = input_value * a + b;
+ output_data[tflite::Offset(output_shape, batch, height, width, channel)] =
+ tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
+ activation_max);
+ }
+ }
+ }
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelBuilder.h"
+#include "Builders.h"
+
+namespace luci_interpreter
+{
+
+KernelConfigureRegistry::KernelConfigureRegistry()
+{
+#define REGISTER_KERNEL(builtin_operator, name) \
+ register_kernel_configure(circle::BuiltinOperator::BuiltinOperator_##builtin_operator, \
+ configure_kernel_Circle##name);
+
+#if USE_GENERATED_LIST
+#include "GeneratedKernelsToBuild.lst"
+#else
+#include "KernelsToBuild.lst"
+#endif
+
+#undef REGISTER_KERNEL
+}
+
+void KernelConfigureRegistry::configure_kernel(const circle::Operator *cur_op,
+ circle::BuiltinOperator opcode,
+ BaseRuntimeGraph *runtime_graph)
+{
+ auto specific_configure_func = get_kernel_configure_func(opcode);
+ if (specific_configure_func == nullptr)
+ assert(false && "Unsupported operator");
+
+ specific_configure_func(cur_op, runtime_graph);
+}
+
+KernelExecuteRegistry::KernelExecuteRegistry()
+{
+#define REGISTER_KERNEL(builtin_operator, name) \
+ register_kernel_execute(circle::BuiltinOperator::BuiltinOperator_##builtin_operator, \
+ execute_kernel_Circle##name);
+
+#if USE_GENERATED_LIST
+#include "GeneratedKernelsToBuild.lst"
+#else
+#include "KernelsToBuild.lst"
+#endif
+
+#undef REGISTER_KERNEL
+}
+
+void KernelExecuteRegistry::execute_kernel(const circle::Operator *cur_op,
+ circle::BuiltinOperator opcode,
+ BaseRuntimeGraph *runtime_graph, bool is_inplace)
+{
+ auto specific_execute_func = get_kernel_execute_func(opcode);
+ if (specific_execute_func == nullptr)
+ assert(false && "Unsupported operator");
+
+ specific_execute_func(cur_op, runtime_graph, is_inplace);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNEL_KERNELBUILDER_H
+#define LUCI_INTERPRETER_KERNEL_KERNELBUILDER_H
+
+#include "core/RuntimeGraph.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <memory>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+namespace
+{
+#ifdef USE_STATIC_ALLOC
+using BaseRuntimeGraph = StaticRuntimeGraph;
+#else
+using BaseRuntimeGraph = RuntimeGraph;
+#endif
+} // namespace
+
+class KernelConfigureRegistry
+{
+public:
+ using KernelConfigureFunc = void(const circle::Operator *, BaseRuntimeGraph *);
+
+ KernelConfigureRegistry();
+
+ void configure_kernel(const circle::Operator *cur_op, circle::BuiltinOperator opcode,
+ BaseRuntimeGraph *runtime_graph);
+
+private:
+ std::unordered_map<int32_t, KernelConfigureFunc *> _operator_configure;
+
+private:
+ KernelConfigureFunc *get_kernel_configure_func(circle::BuiltinOperator opcode) const
+ {
+ return _operator_configure.at(size_t(opcode));
+ }
+
+ void register_kernel_configure(circle::BuiltinOperator id, KernelConfigureFunc *func)
+ {
+ _operator_configure[size_t(id)] = func;
+ }
+};
+
+class KernelExecuteRegistry
+{
+public:
+ using KernelExecuteFunc = void(const circle::Operator *, BaseRuntimeGraph *, bool);
+
+ KernelExecuteRegistry();
+
+ void execute_kernel(const circle::Operator *cur_op, circle::BuiltinOperator opcode,
+ BaseRuntimeGraph *runtime_graph, bool is_inplace);
+
+private:
+ std::unordered_map<int32_t, KernelExecuteFunc *> _operator_execute;
+
+private:
+ KernelExecuteFunc *get_kernel_execute_func(circle::BuiltinOperator opcode) const
+ {
+ return _operator_execute.at(size_t(opcode));
+ }
+
+ void register_kernel_execute(circle::BuiltinOperator id, KernelExecuteFunc *func)
+ {
+ _operator_execute[size_t(id)] = func;
+ }
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNEL_KERNELBUILDER_H
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Normalize.h"
+#include "kernels/Utils.h"
+
+#include "PALL2Normalize.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+L2Normalize::L2Normalize(const Tensor *input, Tensor *output, const L2NormParams ¶ms)
+ : KernelWithParams<L2NormParams>({input}, {output}, params)
+{
+}
+
+void L2Normalize::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= 4);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
+ output()->element_type() == DataType::U8);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ if (output()->element_type() == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(output()->scale() == (1. / 128.));
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == 128);
+ }
+ LUCI_INTERPRETER_CHECK(params().activation == Activation::NONE);
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void L2Normalize::execute() const
+{
+ switch (output()->element_type())
+ {
+ case DataType::FLOAT32:
+ eval<float>(0);
+ break;
+ case DataType::U8:
+ eval<uint8_t>(input()->zero_point());
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+template <typename T> void L2Normalize::eval(int32_t zero_point) const
+{
+ tflite::L2NormalizationParams op_params{};
+ op_params.input_zero_point = zero_point;
+ luci_interpreter_pal::L2Normalization(op_params, getTensorShape(input()),
+ getTensorData<T>(input()), getTensorShape(output()),
+ getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Pool2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALL2Pool2D.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+L2Pool2D::L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms)
+ : KernelWithParams<Pool2DParams>({input}, {output}, params)
+{
+}
+
+void L2Pool2D::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+ int batches = input()->shape().dim(0);
+ int height = input()->shape().dim(1);
+ int width = input()->shape().dim(2);
+ int channels_out = input()->shape().dim(3);
+
+ // Matching GetWindowedOutputSize in TensorFlow.
+ auto padding = params().padding;
+ int out_width, out_height;
+ out_width = computeOutputSize(padding, width, params().filter_width, params().stride_width, 1);
+ out_height =
+ computeOutputSize(padding, height, params().filter_height, params().stride_height, 1);
+ _padding_width =
+ computePadding(params().stride_width, 1, width, params().filter_width, out_width);
+ _padding_height =
+ computePadding(params().stride_height, 1, height, params().filter_height, out_height);
+
+ LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize({batches, out_height, out_width, channels_out});
+}
+
+void L2Pool2D::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ float activation_min, activation_max;
+ calculateActivationRange(params().activation, &activation_min, &activation_max);
+ tflite::PoolParams op_params;
+ op_params.stride_height = params().stride_height;
+ op_params.stride_width = params().stride_width;
+ op_params.filter_height = params().filter_height;
+ op_params.filter_width = params().filter_width;
+ op_params.padding_values.height = _padding_height;
+ op_params.padding_values.width = _padding_width;
+ op_params.float_activation_min = activation_min;
+ op_params.float_activation_max = activation_max;
+ luci_interpreter_pal::L2Pool(op_params, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LeakyRelu.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+#include "PALLeakyRelu.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LeakyRelu::LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams ¶ms)
+ : KernelWithParams<LeakyReluParams>({input}, {output}, params)
+{
+}
+
+void LeakyRelu::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ if (input()->element_type() == DataType::U8)
+ {
+ double alpha_multiplier = input()->scale() * params().alpha / output()->scale();
+ quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha);
+ double identity_multiplier = input()->scale() / output()->scale();
+ quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void LeakyRelu::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void LeakyRelu::evalFloat() const
+{
+ tflite::LeakyReluParams op_params{};
+ op_params.alpha = params().alpha;
+ luci_interpreter_pal::LeakyRelu(op_params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void LeakyRelu::evalQuantized() const
+{
+ tflite::LeakyReluParams op_params{};
+ op_params.input_offset = input()->zero_point();
+ op_params.output_offset = output()->zero_point();
+ op_params.output_multiplier_alpha = _output_multiplier_alpha;
+ op_params.output_shift_alpha = _output_shift_alpha;
+ op_params.output_multiplier_identity = _output_multiplier_identity;
+ op_params.output_shift_identity = _output_shift_identity;
+
+ tflite::reference_ops::QuantizeLeakyRelu(
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Less.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Less::Less(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Less::configure()
+{
+ LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+ if (x()->element_type() == DataType::U8)
+ {
+ quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+ quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Less::execute() const
+{
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Less::evalFloat() const
+{
+ const auto x_data = getTensorData<float>(x());
+ const auto y_data = getTensorData<float>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowLess(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::Less(op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+}
+
+template <typename T> void Less::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowLessNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::LessNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
+void Less::evalQuantized() const
+{
+ const auto x_data = getTensorData<uint8_t>(x());
+ const auto y_data = getTensorData<uint8_t>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.left_shift = 8;
+ op_params.input1_offset = -x()->zero_point(); // Note the '-'
+ op_params.input1_shift = _x_shift;
+ op_params.input1_multiplier = _x_multiplier;
+ op_params.input2_offset = -y()->zero_point(); // Note the '-'
+ op_params.input2_shift = _y_shift;
+ op_params.input2_multiplier = _y_multiplier;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowLessWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::LessWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LessEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LessEqual::LessEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void LessEqual::configure()
+{
+ LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+ if (x()->element_type() == DataType::U8)
+ {
+ quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+ quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void LessEqual::execute() const
+{
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void LessEqual::evalFloat() const
+{
+ const auto x_data = getTensorData<float>(x());
+ const auto y_data = getTensorData<float>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowLessEqual(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::LessEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+ y_data, getTensorShape(output()), output_data);
+ }
+}
+
+template <typename T> void LessEqual::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowLessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::LessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
+void LessEqual::evalQuantized() const
+{
+ const auto x_data = getTensorData<uint8_t>(x());
+ const auto y_data = getTensorData<uint8_t>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.left_shift = 8;
+ op_params.input1_offset = -x()->zero_point(); // Note the '-'
+ op_params.input1_shift = _x_shift;
+ op_params.input1_multiplier = _x_multiplier;
+ op_params.input2_offset = -y()->zero_point(); // Note the '-'
+ op_params.input2_shift = _y_shift;
+ op_params.input2_multiplier = _y_multiplier;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowLessEqualWithScaling(
+ op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+ else
+ {
+ tflite::reference_ops::LessEqualWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LocalResponseNormalization.h"
+
+#include "kernels/Utils.h"
+
+#include "PALLocalResponseNormalization.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LocalResponseNormalization::LocalResponseNormalization(
+ const Tensor *input, Tensor *output, const LocalResponseNormalizationParams ¶ms)
+ : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params)
+{
+}
+
+void LocalResponseNormalization::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void LocalResponseNormalization::execute() const
+{
+ switch (output()->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::LocalResponseNormalizationParams op_params;
+ op_params.range = params().radius;
+ op_params.bias = params().bias;
+ op_params.alpha = params().alpha;
+ op_params.beta = params().beta;
+ luci_interpreter_pal::LocalResponseNormalization(
+ op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogSoftmax.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/log_softmax.h>
+
+#include "PALLogSoftmax.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogSoftmax::LogSoftmax(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void LogSoftmax::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ if (input()->element_type() == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(output()->scale() == 16. / 256);
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == 255);
+
+ tflite::SoftmaxParams params{};
+
+ params.table = _table;
+ params.beta = 1.0;
+ luci_interpreter_pal::PopulateSoftmaxLookupTable(¶ms, input()->scale(), params.beta);
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void LogSoftmax::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void LogSoftmax::evalFloat() const
+{
+ tflite::SoftmaxParams params{};
+ tflite::reference_ops::LogSoftmax(params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void LogSoftmax::evalQuantized() const
+{
+ const auto input_shape = getTensorShape(input());
+ const auto output_shape = getTensorShape(output());
+ const auto input_scale = input()->scale();
+ uint8_t *output_data = getTensorData<uint8_t>(output());
+ const uint8_t *input_data = getTensorData<uint8_t>(input());
+ const float beta = 1.0;
+
+ tflite::SoftmaxParams params{};
+
+ params.table = const_cast<float *>(_table);
+ params.zero_point = output()->zero_point();
+ params.scale = output()->scale();
+
+ luci_interpreter_pal::InitializeParams(¶ms, input_scale, beta);
+ luci_interpreter_pal::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
+ output_data);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalAnd.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalAnd::LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output)
+ : Kernel({input1, input2}, {output})
+{
+}
+
+void LogicalAnd::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void LogicalAnd::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::BOOL:
+ evalLogicalAnd();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+inline void LogicalAnd::evalLogicalAnd() const
+{
+ BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()),
+ getTensorShape(input2()), getTensorData<bool>(input2()),
+ getTensorShape(output()), getTensorData<bool>(output()),
+ [](bool x, bool y) { return x && y; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalNot.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalNot::LogicalNot(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void LogicalNot::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void LogicalNot::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::BOOL:
+ evalLogicalNot();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+inline void LogicalNot::evalLogicalNot() const
+{
+ const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+ bool *output_data = getTensorData<bool>(output());
+ const bool *input_data = getTensorData<bool>(input());
+ for (int i = 0; i < size; ++i)
+ {
+ output_data[i] = !input_data[i];
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalOr.h"
+
+#include "kernels/Utils.h"
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalOr::LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output)
+ : Kernel({input1, input2}, {output})
+{
+}
+
+void LogicalOr::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == DataType::BOOL);
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void LogicalOr::execute() const
+{
+ BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()),
+ getTensorShape(input2()), getTensorData<bool>(input2()),
+ getTensorShape(output()), getTensorData<bool>(output()),
+ [](bool x, bool y) { return x || y; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/logistic.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+#ifndef DIS_FLOAT
+void evalFloat(const circle::Tensor *input, const circle::Tensor *output, bool is_inplace,
+ BaseRuntimeGraph *runtime_graph)
+{
+ const float *input_data = reinterpret_cast<const float *>(runtime_graph->getDataByTensor(input));
+ float *output_data = reinterpret_cast<float *>(runtime_graph->getDataByTensor(output));
+
+ if (is_inplace)
+ {
+ output_data = const_cast<float *>(input_data);
+ }
+
+ assert(input_data != nullptr);
+ assert(output_data != nullptr);
+
+ tflite::reference_ops::Logistic(kernels::getTensorShape(input), input_data,
+ kernels::getTensorShape(output), output_data);
+ if (is_inplace)
+ {
+ runtime_graph->makeInplaceOperation(input, output);
+ }
+}
+#endif // DIS_FLOAT
+
+#ifndef DIS_QUANT
+void evalQuantized(const circle::Tensor *input, const circle::Tensor *output, bool is_inplace,
+ BaseRuntimeGraph *runtime_graph)
+{
+ const int8_t *input_data =
+ reinterpret_cast<const int8_t *>(runtime_graph->getDataByTensor(input));
+ int8_t *output_data = reinterpret_cast<int8_t *>(runtime_graph->getDataByTensor(output));
+ if (is_inplace)
+ output_data = const_cast<int8_t *>(input_data);
+
+ tflite::reference_ops::Logistic(kernels::getTensorShape(input), input_data, Tensor::scale(input),
+ Tensor::zero_point(input), kernels::getTensorShape(output),
+ output_data, Tensor::scale(output), Tensor::zero_point(output));
+ if (is_inplace)
+ {
+ runtime_graph->makeInplaceOperation(input, output);
+ }
+}
+#endif // DIS_QUANT
+
+} // namespace
+
+void configure_kernel_CircleLogistic(const circle::Operator *cur_op,
+ BaseRuntimeGraph *runtime_graph)
+{
+ const auto input_index = cur_op->inputs()->operator[](0);
+ const auto output_index = cur_op->outputs()->operator[](0);
+
+ assert(input_index != -1);
+ assert(output_index != -1);
+
+ const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+ auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+ assert(input != nullptr);
+ assert(output != nullptr);
+
+ LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == Tensor::element_type(output));
+
+#ifndef DIS_QUANT
+ if (Tensor::element_type(input) == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(Tensor::scale(output) == 1. / 256);
+ }
+#endif // DIS_QUANT
+}
+
+void execute_kernel_CircleLogistic(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph,
+ bool is_inplace)
+{
+ const auto input_index = cur_op->inputs()->operator[](0);
+ const auto output_index = cur_op->outputs()->operator[](0);
+
+ assert(input_index != -1);
+ assert(output_index != -1);
+
+ const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+ auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+ assert(input != nullptr);
+ assert(output != nullptr);
+
+ switch (Tensor::element_type(input))
+ {
+#ifndef DIS_FLOAT
+ case DataType::FLOAT32:
+ evalFloat(input, output, is_inplace, runtime_graph);
+ break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+ case DataType::S8:
+ evalQuantized(input, output, is_inplace, runtime_graph);
+ break;
+#endif // DIS_QUANT
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// TODO enable it
+#if 0
+#include "kernels/Logistic.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<getElementType<T>()>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(getElementType<T>());
+
+ Logistic kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+ std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data,
+ std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ std::pair<float, int32_t> input_quant_param =
+ quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
+
+ Logistic kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data, output_tensor.scale() * 2));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class LogisticTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(LogisticTest, DataTypes);
+
+TYPED_TEST(LogisticTest, Simple)
+{
+ Check<TypeParam>(
+ {89}, {89},
+ {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, -9.0909090909, -8.8636363636,
+ -8.6363636364, -8.4090909091, -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000,
+ -7.2727272727, -7.0454545455, -6.8181818182, -6.5909090909, -6.3636363636, -6.1363636364,
+ -5.9090909091, -5.6818181818, -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727,
+ -4.5454545455, -4.3181818182, -4.0909090909, -3.8636363636, -3.6363636364, -3.4090909091,
+ -3.1818181818, -2.9545454545, -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455,
+ -1.8181818182, -1.5909090909, -1.3636363636, -1.1363636364, -0.9090909091, -0.6818181818,
+ -0.4545454545, -0.2272727273, 0.0000000000, 0.2272727273, 0.4545454545, 0.6818181818,
+ 0.9090909091, 1.1363636364, 1.3636363636, 1.5909090909, 1.8181818182, 2.0454545455,
+ 2.2727272727, 2.5000000000, 2.7272727273, 2.9545454545, 3.1818181818, 3.4090909091,
+ 3.6363636364, 3.8636363636, 4.0909090909, 4.3181818182, 4.5454545455, 4.7727272727,
+ 5.0000000000, 5.2272727273, 5.4545454545, 5.6818181818, 5.9090909091, 6.1363636364,
+ 6.3636363636, 6.5909090909, 6.8181818182, 7.0454545455, 7.2727272727, 7.5000000000,
+ 7.7272727273, 7.9545454545, 8.1818181818, 8.4090909091, 8.6363636364, 8.8636363636,
+ 9.0909090909, 9.3181818182, 9.5454545455, 9.7727272727, 10.0000000000},
+ {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, 0.0001414198,
+ 0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, 0.0004404502, 0.0005527786,
+ 0.0006937345, 0.0008706021, 0.0010925128, 0.0013709094, 0.0017201256, 0.0021581065,
+ 0.0027073042, 0.0033957870, 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576,
+ 0.0105038445, 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562,
+ 0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, 0.1145124805,
+ 0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, 0.2871859014, 0.3358556241,
+ 0.3882805886, 0.4434251301, 0.5000000000, 0.5565748699, 0.6117194114, 0.6641443759,
+ 0.7128140986, 0.7570113728, 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195,
+ 0.9065929953, 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438,
+ 0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, 0.9916136424,
+ 0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, 0.9972926958, 0.9978418935,
+ 0.9982798744, 0.9986290906, 0.9989074872, 0.9991293979, 0.9993062655, 0.9994472214,
+ 0.9995595498, 0.9996490604, 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802,
+ 0.9998873271, 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021});
+}
+
+TEST(LogisticTest, IvalidInputOutputType_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Shape input_shape = {1};
+ std::vector<float> input_data{10};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
+
+ Logistic kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(LogisticTest, IvalidQuantParam_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Shape input_shape = {2};
+ std::vector<float> input_data{-10, 10};
+ std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-10, 10);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 255, 0);
+
+ Logistic kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter
+{
+
+namespace
+{
+
+#ifndef DIS_FLOAT
+
+void evalFloat(const circle::Tensor *input, const circle::Tensor *output,
+ const circle::Pool2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+ const int32_t input_height = Tensor::dim(input, 1);
+ const int32_t input_width = Tensor::dim(input, 2);
+
+ const int32_t output_height = kernels::computeOutputSize(
+ luci_padding(options->padding()), input_height, options->filter_height(), options->stride_h());
+ const int32_t output_width = kernels::computeOutputSize(
+ luci_padding(options->padding()), input_width, options->filter_width(), options->stride_w());
+
+ const auto padding_height = kernels::computePadding(options->stride_h(), 1, input_height,
+ options->filter_height(), output_height);
+ const auto padding_width = kernels::computePadding(options->stride_w(), 1, input_width,
+ options->filter_width(), output_width);
+
+ const auto *input_data = runtime_graph->getDataByTensor(input);
+ auto *output_data = runtime_graph->getDataByTensor(output);
+
+ float activation_min{};
+ float activation_max{};
+ kernels::calculateActivationRange(luci_actfunc(options->fused_activation_function()),
+ &activation_min, &activation_max);
+ tflite::PoolParams params{};
+ params.padding_values.height = padding_height;
+ params.padding_values.width = padding_width;
+ params.stride_height = options->stride_h();
+ params.stride_width = options->stride_w();
+ params.filter_height = options->filter_height();
+ params.filter_width = options->filter_width();
+ params.float_activation_min = activation_min;
+ params.float_activation_max = activation_max;
+
+ tflite::reference_ops::MaxPool(
+ params, kernels::getTensorShape(input), kernels::getTensorData<float>(input_data),
+ kernels::getTensorShape(output), kernels::getTensorData<float>(output_data));
+}
+
+#endif // DIS_FLOAT
+
+#ifndef DIS_QUANT
+void evalQuantized(const circle::Tensor *input, const circle::Tensor *output,
+ const circle::Pool2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+ output, &activation_min, &activation_max);
+
+ // Compute padding
+ const int32_t input_height = Tensor::dim(input, 1);
+ const int32_t input_width = Tensor::dim(input, 2);
+
+ const int32_t output_height = kernels::computeOutputSize(
+ luci_padding(options->padding()), input_height, options->filter_height(), options->stride_h());
+ const int32_t output_width = kernels::computeOutputSize(
+ luci_padding(options->padding()), input_width, options->filter_width(), options->stride_w());
+
+ const auto padding_height = kernels::computePadding(options->stride_h(), 1, input_height,
+ options->filter_height(), output_height);
+ const auto padding_width = kernels::computePadding(options->stride_w(), 1, input_width,
+ options->filter_width(), output_width);
+
+ tflite::PoolParams params{};
+ params.padding_values.height = padding_height;
+ params.padding_values.width = padding_width;
+ params.stride_height = options->stride_h();
+ params.stride_width = options->stride_w();
+ params.filter_height = options->filter_height();
+ params.filter_width = options->filter_width();
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ const auto *input_data = runtime_graph->getDataByTensor(input);
+ auto *output_data = runtime_graph->getDataByTensor(output);
+
+ tflite::reference_ops::MaxPool(
+ params, kernels::getTensorShape(input), kernels::getTensorData<uint8_t>(input_data),
+ kernels::getTensorShape(output), kernels::getTensorData<uint8_t>(output_data));
+}
+
+void evalSInt16(const circle::Tensor *input, const circle::Tensor *output,
+ const circle::Pool2DOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),
+ output, &activation_min, &activation_max);
+
+ // Compute padding
+ const int32_t input_height = Tensor::dim(input, 1);
+ const int32_t input_width = Tensor::dim(input, 2);
+
+ const int32_t output_height = kernels::computeOutputSize(
+ luci_padding(options->padding()), input_height, options->filter_height(), options->stride_h());
+ const int32_t output_width = kernels::computeOutputSize(
+ luci_padding(options->padding()), input_width, options->filter_width(), options->stride_w());
+
+ const auto padding_height = kernels::computePadding(options->stride_h(), 1, input_height,
+ options->filter_height(), output_height);
+ const auto padding_width = kernels::computePadding(options->stride_w(), 1, input_width,
+ options->filter_width(), output_width);
+
+ tflite::PoolParams params{};
+ params.padding_values.height = padding_height;
+ params.padding_values.width = padding_width;
+ params.stride_height = options->stride_h();
+ params.stride_width = options->stride_w();
+ params.filter_height = options->filter_height();
+ params.filter_width = options->filter_width();
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ const auto *input_data = runtime_graph->getDataByTensor(input);
+ auto *output_data = runtime_graph->getDataByTensor(output);
+
+ tflite::reference_integer_ops::MaxPool(
+ params, kernels::getTensorShape(input), kernels::getTensorData<int16_t>(input_data),
+ kernels::getTensorShape(output), kernels::getTensorData<int16_t>(output_data));
+}
+
+#endif // DIS_QUANT
+
+} // namespace
+
+void configure_kernel_CircleMaxPool2D(const circle::Operator *cur_op,
+ BaseRuntimeGraph *runtime_graph)
+{
+ const auto input_index = cur_op->inputs()->operator[](0);
+ const auto output_index = cur_op->outputs()->operator[](0);
+
+ assert(input_index != -1);
+ assert(output_index != -1);
+
+ const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+ const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+ LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == Tensor::element_type(output));
+ assert(Tensor::num_dims(input) == 4);
+
+#ifndef DIS_QUANT
+ if (Tensor::element_type(input) == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(std::abs(Tensor::scale(output) - Tensor::scale(input)) <= 1.0e-6);
+ LUCI_INTERPRETER_CHECK(Tensor::zero_point(output) == Tensor::zero_point(input));
+ }
+ else if (Tensor::element_type(input) == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(std::abs(Tensor::scale(output) - Tensor::scale(input)) <= 1.0e-6);
+ LUCI_INTERPRETER_CHECK(Tensor::zero_point(input) == 0 && Tensor::zero_point(output) == 0);
+ }
+#endif // DIS_QUANT
+}
+
+void execute_kernel_CircleMaxPool2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph,
+ bool)
+{
+ const auto input_index = cur_op->inputs()->operator[](0);
+ const auto output_index = cur_op->outputs()->operator[](0);
+
+ assert(input_index != -1);
+ assert(output_index != -1);
+
+ const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+ auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+ const auto *options = cur_op->builtin_options_as_Pool2DOptions();
+
+ switch (Tensor::element_type(input))
+ {
+#ifndef DIS_FLOAT
+ case DataType::FLOAT32:
+ evalFloat(input, output, options, runtime_graph);
+ break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+ case DataType::U8:
+ evalQuantized(input, output, options, runtime_graph);
+ break;
+ case DataType::S16:
+ evalSInt16(input, output, options, runtime_graph);
+ break;
+#endif // DIS_QUANT
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// TODO enable it
+#if 0
+#include "kernels/MaxPool2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MaxPool2DTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MaxPool2DTest, Float)
+{
+ Shape input_shape{1, 3, 5, 1};
+ std::vector<float> input_data{
+ 1, -1, 0, -2, 2, //
+ -7, -6, -5, -4, -3, //
+ 5, 4, 3, 6, 7, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 3;
+ params.stride_height = 1;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ MaxPool2D kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 1, 2, //
+ 5, 6, //
+ };
+ std::initializer_list<int32_t> ref_output_shape{1, 2, 2, 1};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MaxPool2DTest, Uint8)
+{
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375, 15.9375);
+ std::vector<float> input_data{
+ 0, -6, 12, 4, //
+ -3, -2, 10, 7, //
+ };
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 2;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ MaxPool2D kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0.0, 6.0};
+ std::initializer_list<int32_t> ref_output_shape{1, 1, 2, 1};
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MaxPool2DTest, SInt16)
+{
+ Shape input_shape{1, 3, 5, 1};
+ std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+ std::vector<float> input_data{
+ 1, -1, 0, -2, 2, //
+ -7, -6, -5, -4, -3, //
+ 5, 4, 3, 6, 7, //
+ };
+ std::vector<float> ref_output_data{
+ 1, 2, //
+ 5, 6, //
+ };
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 3;
+ params.stride_height = 1;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ MaxPool2D kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#ednif
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Maximum.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Maximum::Maximum(const Tensor *input1, const Tensor *input2, Tensor *output)
+ : Kernel({input1, input2}, {output})
+{
+}
+
+void Maximum::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Maximum::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalMaximum<float>();
+ break;
+ case DataType::U8:
+ evalMaximum<uint8_t>();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+template <typename T> inline void Maximum::evalMaximum() const
+{
+ BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()),
+ [](T x, T y) { return std::max(x, y); });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mean.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params)
+{
+ params->axis_count = num_axes;
+ for (int i = 0; i < num_axes; ++i)
+ {
+ params->axis[i] = static_cast<int16>(axes_data[i]);
+ }
+ for (int i = num_axes; i < 4; ++i)
+ {
+ params->axis[i] = 1;
+ }
+}
+
+// Returns the number of axes that will be reduced. Removes duplicates.
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
+{
+ int reduction_count = num_axes;
+ for (int i = 0; i < num_axes; ++i)
+ {
+ int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
+ assert(current >= 0 && current < input_num_dims);
+ for (int j = 0; j < i; j++)
+ {
+ int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
+ // This checks for duplicate axis
+ if (current == previous)
+ {
+ --reduction_count;
+ break;
+ }
+ }
+ }
+ return reduction_count;
+}
+
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
+ bool keep_dims)
+{
+ int input_num_dims = input_shape.num_dims();
+ if (input_num_dims == 0)
+ {
+ return Shape(0);
+ }
+
+ if (keep_dims)
+ {
+ Shape output_shape(input_num_dims);
+ for (int idx = 0; idx < input_num_dims; ++idx)
+ {
+ bool is_axis = false;
+ for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+ {
+ if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+ {
+ is_axis = true;
+ break;
+ }
+ }
+ if (is_axis)
+ {
+ output_shape.dim(idx) = 1;
+ }
+ else
+ {
+ output_shape.dim(idx) = input_shape.dim(idx);
+ }
+ }
+ return output_shape;
+ }
+ else
+ {
+ int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
+ Shape output_shape(input_num_dims - num_reduce_axes);
+ int num_skip_axes = 0;
+ for (int idx = 0; idx < input_num_dims; ++idx)
+ {
+ bool is_axis = false;
+ for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+ {
+ if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+ {
+ ++num_skip_axes;
+ is_axis = true;
+ break;
+ }
+ }
+ if (!is_axis)
+ {
+ output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
+ }
+ }
+ return output_shape;
+ }
+}
+
+Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+ Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams ¶ms)
+ : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum},
+ params)
+{
+}
+
+void Mean::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
+ if (input()->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+ }
+
+ const Shape &input_shape = input()->shape();
+ int input_num_dims = input_shape.num_dims();
+
+ const auto *axes_data = getTensorData<int32_t>(axes());
+ int num_axes = axes()->shape().num_elements();
+ assert(num_axes <= 4);
+ // TODO: enable it only if kernel with dynamic shapes
+ Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
+ output()->resize(output_shape);
+
+ tflite::MeanParams params{};
+ resolveAxes(axes_data, num_axes, ¶ms);
+ _need_temporaries = !(
+ _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+ ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1)));
+ if (_need_temporaries)
+ {
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+ auto temp_sum = getOutputTensors()[3];
+
+ temp_index->resize(Shape(input_num_dims));
+ resolved_axes->resize(Shape(num_axes));
+ temp_sum->resize(output()->shape());
+ }
+ else
+ {
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+ auto temp_sum = getOutputTensors()[3];
+
+ temp_index->set_allocatable(false);
+ resolved_axes->set_allocatable(false);
+ temp_sum->set_allocatable(false);
+ }
+}
+
+void Mean::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ case DataType::S16:
+ evalQuantizedS16();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Mean::evalFloat() const
+{
+ const Shape &input_shape = input()->shape();
+ int input_num_dims = input_shape.num_dims();
+ const auto *axes_data = getTensorData<int32_t>(axes());
+ int num_axes = axes()->shape().num_elements();
+
+ tflite::MeanParams params{};
+ resolveAxes(axes_data, num_axes, ¶ms);
+
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+ auto temp_sum = getOutputTensors()[3];
+
+ // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+ if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+ ((params.axis[0] == 1 && params.axis[1] == 2) ||
+ (params.axis[0] == 2 && params.axis[1] == 1)))
+ {
+ tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(),
+ input()->shape().num_dims(), getTensorData<float>(output()),
+ getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+ axes_data, num_axes, _params.keep_dims,
+ getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+ getTensorData<float>(temp_sum));
+ }
+}
+
+void Mean::evalQuantized() const
+{
+ const Shape &input_shape = input()->shape();
+ int input_num_dims = input_shape.num_dims();
+ const auto *axes_data = getTensorData<int32_t>(axes());
+ int num_axes = axes()->shape().num_elements();
+
+ tflite::MeanParams params{};
+ resolveAxes(axes_data, num_axes, ¶ms);
+
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+ auto temp_sum = getOutputTensors()[3];
+
+ // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+ if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+ ((params.axis[0] == 1 && params.axis[1] == 2) ||
+ (params.axis[0] == 2 && params.axis[1] == 1)))
+ {
+ tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ input()->zero_point(), input()->scale(), getTensorShape(output()),
+ getTensorData<uint8_t>(output()), output()->zero_point(),
+ output()->scale());
+ }
+ else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
+ {
+ tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
+ input()->shape().num_dims(), getTensorData<uint8_t>(output()),
+ getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+ axes_data, num_axes, _params.keep_dims,
+ getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+ getTensorData<int>(temp_sum));
+ }
+ else
+ {
+ tflite::reference_ops::QuantizedMeanOrSum<>(
+ getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
+ getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+ getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
+ getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+ _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+ getTensorData<int>(temp_sum),
+ /*compute_sum=*/false);
+ }
+}
+
+void Mean::evalQuantizedS16() const
+{
+ const auto *input_data = getTensorData<int16_t>(input());
+ auto *output_data = getTensorData<int16_t>(output());
+
+ const Shape &input_shape = input()->shape();
+ const Shape &output_shape = output()->shape();
+
+ const auto *axes_data = getTensorData<int32_t>(axes());
+ const int num_axes = axes()->shape().num_elements();
+
+ constexpr int32_t output_min = -std::numeric_limits<int16_t>::max();
+ constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
+
+ // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+ if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 &&
+ ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1)))
+ {
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t depth = input_shape.dim(3);
+ assert(output_shape.num_dims() == 4);
+ assert(output_shape.dim(0) == batches);
+ assert(output_shape.dim(1) == 1);
+ assert(output_shape.dim(2) == 1);
+ assert(output_shape.dim(3) == depth);
+
+ const double real_multiplier =
+ static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
+
+ int32_t output_multiplier{};
+ int output_shift{};
+ quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ const int32_t num_elements_in_axes = input_height * input_width;
+
+ for (int32_t batch = 0; batch < batches; ++batch)
+ {
+ for (int32_t c = 0; c < depth; ++c)
+ {
+ int32_t acc = 0;
+ for (int32_t in_y = 0; in_y < input_height; ++in_y)
+ {
+ for (int32_t in_x = 0; in_x < input_width; ++in_x)
+ {
+ acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)];
+ }
+ }
+ int32_t scaled_acc =
+ tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ // Divide by the number of elements rounding to the nearest integer.
+ scaled_acc = scaled_acc > 0
+ ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
+ : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
+
+ scaled_acc = std::max(scaled_acc, output_min);
+ scaled_acc = std::min(scaled_acc, output_max);
+
+ output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc;
+ }
+ }
+ }
+ else
+ {
+ assert(false && "Unsupported configuration.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Minimum.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Minimum::Minimum(const Tensor *input1, const Tensor *input2, Tensor *output)
+ : Kernel({input1, input2}, {output})
+{
+}
+
+void Minimum::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Minimum::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalMinimum<float>();
+ break;
+ case DataType::U8:
+ evalMinimum<uint8_t>();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+template <typename T> inline void Minimum::evalMinimum() const
+{
+ BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()),
+ [](T x, T y) { return std::min(x, y); });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MirrorPad.h"
+
+#include "kernels/Utils.h"
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+MirrorPad::MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output,
+ const MirrorPadParams ¶ms)
+ : KernelWithParams<MirrorPadParams>({input, paddings}, {output}, params)
+{
+}
+
+void MirrorPad::configure()
+{
+ const Shape &input_shape = input()->shape();
+ const int num_dims = input_shape.num_dims();
+
+ if (num_dims > 4)
+ assert(false && "Unsupported number of dimensions.");
+
+ assert(output()->element_type() == input()->element_type());
+ assert(paddings()->element_type() == DataType::S32);
+ // Paddings shape should be [N, 2].
+ assert(paddings()->shape().num_dims() == 2);
+ assert(paddings()->shape().dim(0) == num_dims);
+ assert(paddings()->shape().dim(1) == 2);
+
+ Shape output_shape(num_dims);
+ const auto *paddings_data = getTensorData<int32_t>(paddings());
+ for (int i = 0; i < num_dims; ++i)
+ {
+ const int32_t padding_before = paddings_data[i * 2];
+ const int32_t padding_after = paddings_data[i * 2 + 1];
+ assert(padding_before >= 0 && padding_after >= 0);
+ output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(output_shape);
+}
+
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+ Tensor &output);
+
+void MirrorPad::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ {
+ MirrorPadImpl<float>(*input(), *paddings(), params().mode, *output());
+ break;
+ }
+ case DataType::U8:
+ {
+ assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+ assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+
+ MirrorPadImpl<uint8_t>(*input(), *paddings(), params().mode, *output());
+ break;
+ }
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+ Tensor &output)
+{
+ auto const input_dims = input.shape().num_dims();
+ auto const input_data = input.data<T>();
+ auto const paddings_data = paddings.data<int32_t>();
+ auto const output_data = output.data<T>();
+
+ auto const input_b = input_dims > 3 ? input.shape().dim(input_dims - 4) : 1;
+ auto const input_h = input_dims > 2 ? input.shape().dim(input_dims - 3) : 1;
+ auto const input_w = input_dims > 1 ? input.shape().dim(input_dims - 2) : 1;
+ auto const input_d = input.shape().dim(input_dims - 1);
+
+ auto const input_h_offset = input_d * input_w;
+ auto const input_b_offset = input_h_offset * input_h;
+
+ auto const output_b = input_dims > 3 ? output.shape().dim(input_dims - 4) : 1;
+ auto const output_h = input_dims > 2 ? output.shape().dim(input_dims - 3) : 1;
+ auto const output_w = input_dims > 1 ? output.shape().dim(input_dims - 2) : 1;
+ auto const output_d = output.shape().dim(input_dims - 1);
+
+ auto const left_b_pad = paddings_data[2 * (input_dims - 4)];
+ auto const left_h_pad = paddings_data[2 * (input_dims - 3)];
+ auto const left_w_pad = paddings_data[2 * (input_dims - 2)];
+ auto const left_d_pad = paddings_data[2 * (input_dims - 1)];
+
+ auto const right_b_pad = paddings_data[2 * (input_dims - 4) + 1];
+ auto const right_h_pad = paddings_data[2 * (input_dims - 3) + 1];
+ auto const right_w_pad = paddings_data[2 * (input_dims - 2) + 1];
+ auto const right_d_pad = paddings_data[2 * (input_dims - 1) + 1];
+
+ const auto positive_mod = [](auto a, auto b) { return (a % b + b) % b; };
+ const auto offset_index = [input_d, input_h_offset, input_b_offset](auto d, auto w, auto h,
+ auto b) {
+ return d + w * input_d + h * input_h_offset + b * input_b_offset;
+ };
+
+ const auto symmetric_dim = [&positive_mod](auto i, auto left_pad, auto input) {
+ bool reflected = (((i < left_pad ? i + 1 - input : i) - left_pad) / input & 1) == 1;
+ return positive_mod(reflected ? input + left_pad - i - 1 : i - left_pad, input);
+ };
+
+ const T *in_ptr = input_data;
+ T *out_ptr = output_data;
+
+ for (int32_t b = 0; b < output_b; ++b)
+ {
+ for (int32_t h = 0; h < output_h; ++h)
+ {
+ for (int32_t w = 0; w < output_w; ++w)
+ {
+ for (int32_t d = 0; d < output_d; ++d)
+ {
+ if (b < left_b_pad || b >= output_b - right_b_pad || //
+ h < left_h_pad || h >= output_h - right_h_pad || //
+ w < left_w_pad || w >= output_w - right_w_pad || //
+ d < left_d_pad || d >= output_d - right_d_pad)
+ {
+ if (mode == MirrorPadMode::REFLECT)
+ {
+ *out_ptr++ = input_data[offset_index(
+ positive_mod(d - left_d_pad, input_d), positive_mod(w - left_w_pad, input_w),
+ positive_mod(h - left_h_pad, input_h), positive_mod(b - left_b_pad, input_b))];
+ }
+ else
+ {
+ *out_ptr++ = input_data[offset_index(
+ symmetric_dim(d, left_d_pad, input_d), symmetric_dim(w, left_w_pad, input_w),
+ symmetric_dim(h, left_h_pad, input_h), symmetric_dim(b, left_b_pad, input_b))];
+ }
+ }
+ else
+ {
+ *out_ptr++ = *in_ptr++;
+ }
+ }
+ }
+ }
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mul.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include "PALMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams ¶ms)
+ : KernelWithParams<MulParams>({input1, input2}, {output}, params)
+{
+}
+
+void Mul::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type());
+ if (input1()->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+ input2()->zero_points().size() == 1)
+ LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
+ output()->zero_point() == 0);
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Mul::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::S16:
+ evalQuantizedS16();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Mul::evalFloat() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<float>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+
+ if (need_broadcast)
+ {
+ luci_interpreter_pal::BroadcastMul4DSlow(
+ params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+ getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+ }
+ else
+ {
+ luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
+ getTensorShape(input2()), getTensorData<float>(input2()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ }
+}
+
+template <typename T> void Mul::evalInteger() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<T>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+
+ if (need_broadcast)
+ {
+ luci_interpreter_pal::BroadcastMul4DSlow(
+ params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+ getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
+void Mul::evalQuantizedS16() const
+{
+ const auto input1_scale = static_cast<double>(input1()->scale());
+ const auto input2_scale = static_cast<double>(input2()->scale());
+ const auto output_scale = static_cast<double>(output()->scale());
+
+ const double real_multiplier = input1_scale * input2_scale / output_scale;
+
+ int32_t output_multiplier;
+ int output_shift;
+ quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ auto fn = [output_multiplier, output_shift, activation_min, activation_max](int16_t input1_val,
+ int16_t input2_val) {
+ int32_t output = static_cast<int32_t>(input1_val) * static_cast<int32_t>(input2_val);
+ output = tflite::MultiplyByQuantizedMultiplier(output, output_multiplier, output_shift);
+ output = std::max(output, activation_min);
+ output = std::min(output, activation_max);
+ return static_cast<int16_t>(output);
+ };
+
+ BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
+ getTensorShape(input2()), getTensorData<int16_t>(input2()),
+ getTensorShape(output()), getTensorData<int16_t>(output()), fn);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Neg.h"
+#include "kernels/Utils.h"
+
+#include "PALNeg.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Neg::Neg(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Neg::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void Neg::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Neg::evalFloat() const
+{
+ luci_interpreter_pal::Negate(getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/NotEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+NotEqual::NotEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void NotEqual::configure()
+{
+ LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+ if (x()->element_type() == DataType::U8)
+ {
+ quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+ quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void NotEqual::execute() const
+{
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void NotEqual::evalFloat() const
+{
+ const auto x_data = getTensorData<float>(x());
+ const auto y_data = getTensorData<float>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowNotEqual(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::NotEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+ y_data, getTensorShape(output()), output_data);
+ }
+}
+
+template <typename T> void NotEqual::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowNotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::NotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
+void NotEqual::evalQuantized() const
+{
+ const auto x_data = getTensorData<uint8_t>(x());
+ const auto y_data = getTensorData<uint8_t>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.left_shift = 8;
+ op_params.input1_offset = -x()->zero_point(); // Note the '-'
+ op_params.input1_shift = _x_shift;
+ op_params.input1_multiplier = _x_multiplier;
+ op_params.input2_offset = -y()->zero_point(); // Note the '-'
+ op_params.input2_shift = _y_shift;
+ op_params.input2_multiplier = _y_multiplier;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowNotEqualWithScaling(
+ op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+ else
+ {
+ tflite::reference_ops::NotEqualWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename T>
+void OneHotComputeImpl(const Tensor *indices_tensor, const Tensor *on_value_tensor,
+ const Tensor *off_value_tensor, int32_t depth, int32_t axis,
+ Tensor *output_tensor)
+{
+ // define input shape and correct axis
+ auto const &input_shape = indices_tensor->shape();
+ axis = axis == -1 ? input_shape.num_dims() : axis;
+
+ // TODO support other integer input types
+ auto const *indices = getTensorData<int32_t>(indices_tensor);
+ auto const on_value = getTensorData<T>(on_value_tensor)[0];
+ auto const off_value = getTensorData<T>(off_value_tensor)[0];
+ auto *output = getTensorData<T>(output_tensor);
+
+ // prefix_dim_size == # of elements before the axis
+ // depth == # of elements per axis
+ // suffix_dim_size == # of elements after the axis
+ auto prefix_dim_size = 1;
+ for (int32_t i = 0; i < axis; ++i)
+ {
+ prefix_dim_size *= input_shape.dim(i);
+ }
+ assert(prefix_dim_size > 0);
+ auto const suffix_dim_size = input_shape.num_elements() / prefix_dim_size;
+
+ // View the indices as a matrix of size:
+ // prefix_dim_size x suffix_dim_size
+ // View the output as a matrix of size:
+ // prefix_dim_size x depth x suffix_dim_size
+ // Then the output is:
+ // output(i, j, k) == (indices(i, k) == j) ? on : off
+ for (int32_t i = 0; i < prefix_dim_size; ++i)
+ for (int32_t j = 0; j < depth; ++j)
+ for (int32_t k = 0; k < suffix_dim_size; ++k, ++output)
+ *output = indices[i * suffix_dim_size + k] == j ? on_value : off_value;
+}
+
+} // namespace
+
+OneHot::OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+ const Tensor *off_value, Tensor *output, const OneHotParams ¶ms)
+ : KernelWithParams<OneHotParams>({indices, depth, on_value, off_value}, {output}, params)
+{
+ // Do nothing
+}
+
+void OneHot::configure()
+{
+ // check types
+ LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32);
+ LUCI_INTERPRETER_CHECK(depth()->element_type() == DataType::S32);
+ LUCI_INTERPRETER_CHECK(on_value()->element_type() == off_value()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == on_value()->element_type());
+
+ // check shape dependent parameters
+ LUCI_INTERPRETER_CHECK(on_value()->shape().num_elements() == 1);
+ LUCI_INTERPRETER_CHECK(off_value()->shape().num_elements() == 1);
+ LUCI_INTERPRETER_CHECK(depth()->shape().num_elements() == 1);
+ LUCI_INTERPRETER_CHECK(params().axis >= -1 && params().axis <= indices()->shape().num_dims());
+
+ // define parameters that affect the output shape
+ auto const depth_value = getTensorData<int32_t>(depth())[0];
+ auto const &input_shape = indices()->shape();
+ auto const input_dims = input_shape.num_dims();
+ auto const axis = params().axis == -1 ? input_dims : params().axis;
+
+ // define output shape
+ Shape output_shape(input_shape.num_dims() + 1);
+ {
+ for (int32_t d = 0; d < axis; ++d)
+ output_shape.dim(d) = input_shape.dim(d);
+
+ output_shape.dim(axis) = depth_value;
+
+ for (int32_t d = axis + 1; d < output_shape.num_dims(); ++d)
+ output_shape.dim(d) = input_shape.dim(d - 1);
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ // reshape output
+ output()->resize(output_shape);
+}
+
+void OneHot::execute() const
+{
+ auto const depth_value = getTensorData<int32_t>(depth())[0];
+ auto const axis = params().axis;
+
+ switch (output()->element_type())
+ {
+ case DataType::FLOAT32:
+ OneHotComputeImpl<float>(indices(), on_value(), off_value(), depth_value, axis, output());
+ break;
+ case DataType::U8:
+ OneHotComputeImpl<uint8_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+ break;
+ case DataType::S16:
+ OneHotComputeImpl<int16_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+ break;
+ default:
+ // TODO Support other data types
+ assert(false && "Not supported, yet!");
+ break;
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PRelu.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <tensorflow/lite/kernels/internal/reference/prelu.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+PRelu::PRelu(const Tensor *input, const Tensor *alpha, Tensor *output)
+ : Kernel({input, alpha}, {output})
+{
+}
+
+PRelu::~PRelu()
+{
+ // Destructor declared to delete vector of alpha quantized data properly
+}
+
+void PRelu::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ LUCI_INTERPRETER_CHECK(alpha()->element_type() == output()->element_type());
+ LUCI_INTERPRETER_CHECK(input()->scales().size() <= 1);
+ LUCI_INTERPRETER_CHECK(output()->scales().size() <= 1);
+
+ if (input()->element_type() == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(alpha()->scales().size() <= 1); // remove when CWQ kernel arrives
+ _alpha_multipliers.resize(1);
+ double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale();
+ quantizeMultiplier(alpha_multiplier, &_alpha_multipliers[0].multiplier,
+ &_alpha_multipliers[0].shift);
+ double identity_multiplier = input()->scale() / output()->scale();
+ quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+ }
+ else if (input()->element_type() == DataType::S16)
+ {
+ // Common check for correctness of quant params
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+ for (size_t channel = 0; channel < alpha()->zero_points().size(); ++channel)
+ {
+ LUCI_INTERPRETER_CHECK(alpha()->zero_points()[channel] == 0);
+ }
+ // PRelu specific checks for CWQ
+ LUCI_INTERPRETER_CHECK(alpha()->quantized_dimension() == alpha()->shape().num_dims() - 1);
+ LUCI_INTERPRETER_CHECK(static_cast<int32_t>(alpha()->scales().size()) ==
+ alpha()->shape().dim(alpha()->quantized_dimension()));
+ LUCI_INTERPRETER_CHECK(alpha()->shape().num_elements() ==
+ input()->shape().dim(input()->shape().num_dims() - 1));
+
+ // all dimension of alpha except last one should be size 1
+ for (int dim = 0; dim < alpha()->shape().num_dims() - 1; ++dim)
+ {
+ LUCI_INTERPRETER_CHECK(alpha()->shape().dim(dim) == 1);
+ }
+
+ std::vector<double> real_multipliers =
+ getQuantizedConvolutionMultiplers(input()->scale(), alpha()->scales(), output()->scale());
+
+ _alpha_multipliers = quantizeMultipliers(real_multipliers);
+
+ double identity_multiplier = input()->scale() / output()->scale();
+ quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(input()->shape(), alpha()->shape()));
+}
+
+void PRelu::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ case DataType::S16:
+ evalQuantizedS16();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void PRelu::evalFloat() const
+{
+ const auto input_data = getTensorData<float>(input());
+ const auto alpha_data = getTensorData<float>(alpha());
+ const auto size = getTensorShape(input()).FlatSize();
+ auto output_data = getTensorData<float>(output());
+
+ auto PReluFunc = [](float input, float alpha) { return input >= 0.0 ? input : input * alpha; };
+
+ if (input()->shape() != alpha()->shape())
+ {
+ tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+ getTensorShape(input()), getTensorData<float>(input()), getTensorShape(alpha()),
+ getTensorData<float>(alpha()), getTensorShape(output()), getTensorData<float>(output()),
+ PReluFunc);
+ }
+ else
+ {
+ for (auto i = decltype(size){0}; i < size; ++i)
+ {
+ if (input_data[i] >= 0)
+ output_data[i] = input_data[i];
+ else
+ output_data[i] = input_data[i] * alpha_data[i];
+ }
+ }
+}
+
+void PRelu::evalQuantized() const
+{
+ tflite::PreluParams op_params{};
+
+ op_params.input_offset = -input()->zero_point(); // Note the '-'.
+ op_params.alpha_offset = -alpha()->zero_point(); // Note the '-'.
+ op_params.output_offset = output()->zero_point();
+ op_params.output_shift_1 = _output_shift_identity;
+ op_params.output_multiplier_1 = _output_multiplier_identity;
+ op_params.output_shift_2 = _alpha_multipliers[0].shift;
+ op_params.output_multiplier_2 = _alpha_multipliers[0].multiplier;
+
+ if (input()->shape() != alpha()->shape())
+ {
+ tflite::reference_ops::BroadcastPrelu4DSlow(
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+ getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Prelu<uint8_t>(
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+ getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+ }
+}
+
+static inline int16_t evalElemS16PRelu(int16_t input_val, int16_t alpha_val,
+ const ChannelQuantMultipliers &identity_mult,
+ const ChannelQuantMultipliers &alpha_mult)
+{
+ constexpr int32_t quantized_min = std::numeric_limits<int16_t>::min();
+ constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max();
+
+ const int32_t output_val =
+ input_val >= 0
+ ? tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val),
+ identity_mult.multiplier, identity_mult.shift)
+ : tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val * alpha_val),
+ alpha_mult.multiplier, alpha_mult.shift);
+ const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val));
+ return clamped_output;
+}
+
+void PRelu::evalQuantizedS16() const
+{
+ // Note that this kernel assumes alpha is CWQ
+ tflite::RuntimeShape input_shape = getTensorShape(input());
+ const int16_t *input_data = input()->data<int16_t>();
+ const int16_t *alpha_data = alpha()->data<int16_t>();
+ int16_t *output_data = output()->data<int16_t>();
+
+ const ChannelQuantMultipliers pos_mult{_output_shift_identity, _output_multiplier_identity};
+
+ const int last_dim = input()->shape().num_dims() - 1;
+
+ int32_t outer_dims_size = 1;
+ for (int i = 0; i < last_dim; ++i)
+ outer_dims_size *= input_shape.Dims(i);
+ int32_t quant_dim_size = input_shape.Dims(last_dim);
+
+ for (int32_t outer_dims = 0; outer_dims < outer_dims_size; ++outer_dims)
+ for (int32_t quant_channel = 0; quant_channel < quant_dim_size; ++quant_channel)
+ {
+ const ChannelQuantMultipliers &neg_mult = _alpha_multipliers[quant_channel];
+ size_t offset = static_cast<size_t>(outer_dims) * static_cast<size_t>(quant_dim_size);
+ offset += quant_channel;
+
+ output_data[offset] =
+ evalElemS16PRelu(input_data[offset], alpha_data[quant_channel], pos_mult, neg_mult);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pack.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pack::Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams ¶ms)
+ : KernelWithParams<PackParams>(std::move(inputs), {output}, params)
+{
+}
+
+void Pack::configure()
+{
+ LUCI_INTERPRETER_CHECK(_inputs.size() == static_cast<uint32_t>(params().values_count));
+ const Tensor *t0 = _inputs[0];
+ const int dimension_size = t0->shape().num_dims() + 1;
+ int axis = params().axis;
+ if (axis < 0)
+ {
+ axis += dimension_size;
+ }
+ LUCI_INTERPRETER_CHECK(axis >= 0 && axis <= t0->shape().num_dims());
+
+ if (t0->element_type() != DataType::S32 && t0->element_type() != DataType::FLOAT32 &&
+ t0->element_type() != DataType::U8 && t0->element_type() != DataType::S8 &&
+ t0->element_type() != DataType::S16 && t0->element_type() != DataType::S64)
+ {
+ assert(false && "Unsupported type.");
+ }
+
+ for (uint32_t i = 1; i < _inputs.size(); ++i)
+ {
+ const Tensor *tensor = _inputs[i];
+ LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
+ LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
+ for (int d = 0; d < t0->shape().num_dims(); ++d)
+ {
+ LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
+ }
+ }
+
+ Shape output_shape(dimension_size);
+ int i = 0;
+ for (int index = 0; index < dimension_size; ++index)
+ {
+ if (index == axis)
+ {
+ output_shape.dim(index) = params().values_count;
+ }
+ else
+ {
+ output_shape.dim(index) = t0->shape().dim(i++);
+ }
+ }
+
+ if (t0->element_type() == DataType::U8 || t0->element_type() == DataType::S8 ||
+ t0->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point());
+ LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale());
+ // Guarantee input/output quantization params match as we do not support
+ // packing quantized tensors.
+ for (int i = 0; i < params().values_count; i++)
+ {
+ LUCI_INTERPRETER_CHECK(_inputs[i]->zero_point() == t0->zero_point());
+ LUCI_INTERPRETER_CHECK(_inputs[i]->scale() == t0->scale());
+ }
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(output_shape);
+}
+
+void Pack::execute() const
+{
+ switch (_inputs[0]->element_type())
+ {
+ case DataType::FLOAT32:
+ evalGeneric<float>();
+ break;
+ case DataType::U8:
+ evalGeneric<uint8_t>();
+ break;
+ case DataType::S8:
+ evalGeneric<int8_t>();
+ break;
+ case DataType::S16:
+ evalGeneric<int16_t>();
+ break;
+ case DataType::S32:
+ evalGeneric<int32_t>();
+ break;
+ case DataType::S64:
+ evalGeneric<int64_t>();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+template <typename T> void Pack::evalGeneric() const
+{
+ const Tensor *t0 = _inputs[0];
+ const int dimension_size = t0->shape().num_dims() + 1;
+ int axis = params().axis;
+ if (axis < 0)
+ {
+ axis += dimension_size;
+ }
+
+ VectorOfTensors<T, true> inputs(_inputs);
+ tflite::PackParams params{};
+ params.axis = axis;
+ params.inputs_count = _inputs.size();
+ tflite::reference_ops::Pack<T>(params, inputs.shapes(), inputs.data(), getTensorShape(output()),
+ getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pad.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pad::Pad(const Tensor *input, const Tensor *paddings, Tensor *output)
+ : Kernel({input, paddings}, {output})
+{
+}
+
+void Pad::configure()
+{
+ const Shape &input_shape = input()->shape();
+ const int num_dims = input_shape.num_dims();
+
+ if (num_dims > 4)
+ assert(false && "Unsupported number of dimensions.");
+
+ assert(output()->element_type() == input()->element_type());
+ assert(paddings()->element_type() == DataType::S32);
+ // Paddings shape should be [N, 2].
+ assert(paddings()->shape().num_dims() == 2);
+ assert(paddings()->shape().dim(0) == num_dims);
+ assert(paddings()->shape().dim(1) == 2);
+
+ Shape output_shape(num_dims);
+ const auto *paddings_data = getTensorData<int32_t>(paddings());
+ for (int i = 0; i < num_dims; ++i)
+ {
+ const int32_t padding_before = paddings_data[i * 2];
+ const int32_t padding_after = paddings_data[i * 2 + 1];
+ assert(padding_before >= 0 && padding_after >= 0);
+ output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(output_shape);
+}
+
+void Pad::execute() const
+{
+ const int num_dims = input()->shape().num_dims();
+
+ tflite::PadParams params{};
+ params.left_padding_count = num_dims;
+ params.right_padding_count = num_dims;
+
+ const auto *paddings_data = getTensorData<int32_t>(paddings());
+ for (int i = num_dims - 1; i >= 0; --i)
+ {
+ params.left_padding[i] = paddings_data[i * 2];
+ params.right_padding[i] = paddings_data[i * 2 + 1];
+ }
+
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ {
+ const float pad_value = 0.0f;
+ tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
+ &pad_value, getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ }
+ case DataType::U8:
+ {
+ assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+ assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+ const auto pad_value = static_cast<uint8_t>(output()->zero_point());
+ tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ &pad_value, getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ }
+ case DataType::S8:
+ {
+ assert(output()->zero_point() >= std::numeric_limits<int8_t>::min());
+ assert(output()->zero_point() <= std::numeric_limits<int8_t>::max());
+ const auto pad_value = static_cast<int8_t>(output()->zero_point());
+ tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<int8_t>(input()),
+ &pad_value, getTensorShape(output()),
+ getTensorData<int8_t>(output()));
+ break;
+ }
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PadV2.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+PadV2::PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values,
+ Tensor *output)
+ : Kernel({input, paddings, constant_values}, {output})
+{
+}
+
+void PadV2::configure()
+{
+ const Shape &input_shape = input()->shape();
+ const int num_dims = input_shape.num_dims();
+
+ if (num_dims > 4)
+ assert(false && "Unsupported number of dimensions.");
+
+ assert(output()->element_type() == input()->element_type());
+ assert(paddings()->element_type() == DataType::S32);
+ assert(constant_values()->element_type() == output()->element_type());
+ // Paddings shape should be [N, 2].
+ assert(paddings()->shape().num_dims() == 2);
+ assert(paddings()->shape().dim(0) == num_dims);
+ assert(paddings()->shape().dim(1) == 2);
+ // Constant values elements number should be 1.
+ assert(constant_values()->shape().num_elements() == 1);
+
+ Shape output_shape(num_dims);
+ const auto *paddings_data = getTensorData<int32_t>(paddings());
+ for (int i = 0; i < num_dims; ++i)
+ {
+ const int32_t padding_before = paddings_data[i * 2];
+ const int32_t padding_after = paddings_data[i * 2 + 1];
+ assert(padding_before >= 0 && padding_after >= 0);
+ output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(output_shape);
+}
+
+void PadV2::execute() const
+{
+ const int num_dims = input()->shape().num_dims();
+
+ tflite::PadParams params{};
+ params.left_padding_count = num_dims;
+ params.right_padding_count = num_dims;
+
+ const auto *paddings_data = getTensorData<int32_t>(paddings());
+ for (int i = num_dims - 1; i >= 0; --i)
+ {
+ params.left_padding[i] = paddings_data[i * 2];
+ params.right_padding[i] = paddings_data[i * 2 + 1];
+ }
+
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ {
+ const auto pad_value = getTensorData<float>(constant_values())[0];
+ tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
+ &pad_value, getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ }
+ case DataType::U8:
+ {
+ assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+ assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+ const auto pad_value = getTensorData<uint8_t>(constant_values())[0];
+ tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ &pad_value, getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ }
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pow.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pow::Pow(const Tensor *input1, const Tensor *input2, Tensor *output)
+ : Kernel({input1, input2}, {output})
+{
+}
+
+void Pow::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Pow::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ eval<float>();
+ break;
+ case DataType::S32:
+ eval<int32_t>();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+template <typename T> void Pow::eval() const
+{
+ tflite::ArithmeticParams params{};
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastPow4DSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Pow(getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/Utils.h"
+#include "PALQuantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename input_dtype> void call_requantize(const Tensor *input, Tensor *output)
+{
+ int32_t multiplier;
+ int shift;
+
+ const double effective_output_scale = input->scale() / output->scale();
+ quantizeMultiplier(effective_output_scale, &multiplier, &shift);
+
+ const auto input_shape = getTensorShape(input);
+ const auto output_shape = getTensorShape(output);
+ const auto size = tflite::MatchingFlatSize(input_shape, output_shape);
+
+ const auto input_data = getTensorData<input_dtype>(input);
+
+ switch (output->element_type())
+ {
+ case DataType::S8:
+ luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+ output->zero_point(), getTensorData<int8_t>(output));
+ break;
+ case DataType::U8:
+ luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+ output->zero_point(), getTensorData<uint8_t>(output));
+ break;
+ case DataType::S16:
+ luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+ output->zero_point(), getTensorData<int16_t>(output));
+ break;
+ default:
+ assert(false && "Unsupported quantized type, yet!");
+ }
+}
+
+} // namespace
+
+Quantize::Quantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Quantize::configure()
+{
+
+ if (input()->element_type() == DataType::S16)
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ {
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::U8 ||
+ output()->element_type() == DataType::S8 ||
+ output()->element_type() == DataType::S16);
+ break;
+ }
+ case DataType::S16:
+ case DataType::S8:
+ case DataType::U8:
+ {
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8 ||
+ output()->element_type() == DataType::U8 ||
+ output()->element_type() == DataType::S16);
+ if (output()->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == 0);
+ }
+ break;
+ }
+ default:
+ assert(false && "Unsupported type");
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void Quantize::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ {
+ tflite::QuantizationParams op_params;
+ op_params.zero_point = output()->zero_point();
+ op_params.scale = output()->scale();
+ const auto input_data = getTensorData<float>(input());
+
+ switch (output()->element_type())
+ {
+ case DataType::S8:
+ {
+ luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+ getTensorShape(output()), getTensorData<int8_t>(output()));
+ break;
+ }
+ case DataType::U8:
+ {
+ luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+ getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ }
+ case DataType::S16:
+ {
+ luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+ getTensorShape(output()),
+ getTensorData<int16_t>(output()));
+ break;
+ }
+ default:
+ assert(false && "Unsupported type.");
+ }
+ break;
+ }
+ case DataType::S16:
+ {
+ call_requantize<int16_t>(input(), output());
+ break;
+ }
+ case DataType::S8:
+ {
+ call_requantize<int8_t>(input(), output());
+ break;
+ }
+ case DataType::U8:
+ {
+ call_requantize<uint8_t>(input(), output());
+ break;
+ }
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu.h"
+#include "kernels/Utils.h"
+
+#include "PALRelu.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Relu::Relu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Relu::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ if (input()->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+ }
+
+ if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+ {
+ double multiplier = input()->scale() / output()->scale();
+ quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void Relu::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ case DataType::S16:
+ evalQuantizedS16();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Relu::evalFloat() const
+{
+ const auto input_data = getTensorData<float>(input());
+ const auto input_shape = getTensorShape(input());
+ auto output_data = getTensorData<float>(output());
+ auto output_shape = getTensorShape(output());
+
+ luci_interpreter_pal::Relu(input_shape, input_data, output_shape, output_data);
+}
+
+void Relu::evalQuantized() const
+{
+ tflite::ReluParams params;
+ params.input_offset = input()->zero_point();
+ params.output_offset = output()->zero_point();
+ params.output_multiplier = _output_multiplier;
+ params.output_shift = _output_shift;
+
+ params.quantized_activation_min =
+ std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+ params.quantized_activation_max = static_cast<int32_t>(std::numeric_limits<uint8_t>::max());
+
+ luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void Relu::evalQuantizedS16() const
+{
+ const auto *input_data = getTensorData<int16_t>(input());
+ auto *output_data = getTensorData<int16_t>(output());
+
+ constexpr int32_t output_min = 0;
+ constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
+
+ const int32_t num_elements = input()->shape().num_elements();
+
+ for (int32_t i = 0; i < num_elements; ++i)
+ {
+ const int32_t input_val = input_data[i];
+ int32_t output_val =
+ tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier, _output_shift);
+ output_val = std::max(output_val, output_min);
+ output_val = std::min(output_val, output_max);
+ output_data[i] = static_cast<int16_t>(output_val);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu6.h"
+#include "kernels/Utils.h"
+
+#include "PALRelu6.h"
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Relu6::Relu6(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Relu6::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+ if (input()->element_type() == DataType::U8)
+ {
+ double multiplier = input()->scale() / output()->scale();
+ quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void Relu6::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Relu6::evalFloat() const
+{
+ const auto input_data = getTensorData<float>(input());
+ const auto input_shape = getTensorShape(input());
+ auto output_data = getTensorData<float>(output());
+ auto output_shape = getTensorShape(output());
+
+ luci_interpreter_pal::Relu6(input_shape, input_data, output_shape, output_data);
+}
+
+void Relu6::evalQuantized() const
+{
+ tflite::ReluParams params;
+ params.input_offset = input()->zero_point();
+ params.output_offset = output()->zero_point();
+ params.output_multiplier = _output_multiplier;
+ params.output_shift = _output_shift;
+
+ params.quantized_activation_min =
+ std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+ params.quantized_activation_max =
+ std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()),
+ params.output_offset + static_cast<int32>(roundf(6.f / output()->scale())));
+
+ luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include <cassert>
+#include <cstring>
+
+namespace luci_interpreter
+{
+
+void configure_kernel_CircleReshape(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+ // Do nothing
+}
+
+void execute_kernel_CircleReshape(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph,
+ bool is_inplace)
+{
+ const auto input_index = cur_op->inputs()->operator[](0);
+ const auto output_index = cur_op->outputs()->operator[](0);
+
+ assert(input_index != -1);
+ assert(output_index != -1);
+
+ const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+ const auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+ if (is_inplace)
+ {
+ runtime_graph->makeInplaceOperation(input, output);
+ return;
+ }
+
+ const auto input_data = (runtime_graph->getDataByTensor(input));
+ auto output_data = (runtime_graph->getDataByTensor(output));
+
+ assert(input_data != nullptr);
+ assert(output_data != nullptr);
+
+ const size_t element_size = getDataTypeSize(Tensor::element_type(input));
+ const int32_t num_elements = Tensor::num_elements(input);
+ std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#if 0
+#include "kernels/Reshape.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ReshapeTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// TODO Test types other than FLOAT32.
+
+TEST_F(ReshapeTest, Regular)
+{
+ Shape input_shape{1, 2, 2, 3};
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+ Shape shape_shape{2};
+ std::vector<int32_t> shape_data{3, 4};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor shape_tensor =
+ makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
+}
+
+TEST_F(ReshapeTest, UnknownDimension)
+{
+ Shape input_shape{2, 1, 2, 3};
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+ Shape shape_shape{3};
+ std::vector<int32_t> shape_data{2, -1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor shape_tensor =
+ makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeBilinear.h"
+
+#include "kernels/Utils.h"
+
+#include "PALResizeBilinear.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ResizeBilinear::ResizeBilinear(const Tensor *input, const Tensor *size, Tensor *output,
+ const ResizeBilinearParams ¶ms)
+ : KernelWithParams<ResizeBilinearParams>({input, size}, {output}, params)
+{
+}
+
+void ResizeBilinear::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
+ if (params().half_pixel_centers && params().align_corners)
+ assert(false && "If half_pixel_centers is True, align_corners must be False.");
+ LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
+ Shape output_shape(4);
+ output_shape.dim(0) = input()->shape().dim(0);
+ output_shape.dim(1) = getTensorData<int32_t>(size())[0];
+ output_shape.dim(2) = getTensorData<int32_t>(size())[1];
+ output_shape.dim(3) = input()->shape().dim(3);
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(output_shape);
+}
+
+void ResizeBilinear::execute() const
+{
+ tflite::ResizeBilinearParams op_params{};
+ op_params.align_corners = params().align_corners;
+ op_params.half_pixel_centers = params().half_pixel_centers;
+ switch (output()->element_type())
+ {
+ case DataType::FLOAT32:
+ luci_interpreter_pal::ResizeBilinear(
+ op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()),
+ getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ luci_interpreter_pal::ResizeBilinear(
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+ getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeNearestNeighbor.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+#include "PALResizeNearestNeighbor.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ResizeNearestNeighbor::ResizeNearestNeighbor(const Tensor *input, const Tensor *size,
+ Tensor *output,
+ const ResizeNearestNeighborParams ¶ms)
+ : KernelWithParams<ResizeNearestNeighborParams>({input, size}, {output}, params)
+{
+}
+
+void ResizeNearestNeighbor::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
+ LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
+ Shape output_shape(4);
+ output_shape.dim(0) = input()->shape().dim(0);
+ output_shape.dim(1) = getTensorData<int32_t>(size())[0];
+ output_shape.dim(2) = getTensorData<int32_t>(size())[1];
+ output_shape.dim(3) = input()->shape().dim(3);
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(output_shape);
+}
+
+void ResizeNearestNeighbor::execute() const
+{
+ tflite::ResizeNearestNeighborParams op_params{};
+ op_params.align_corners = params().align_corners;
+ op_params.half_pixel_centers = params().half_pixel_centers;
+ switch (output()->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::reference_ops::ResizeNearestNeighbor(
+ op_params, getTensorShape(input()), getTensorData<int32_t>(input()), getTensorShape(size()),
+ getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<int32_t>(output()));
+ break;
+ case DataType::U8:
+ luci_interpreter_pal::ResizeNearestNeighbor(
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+ getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReverseV2.h"
+#include "kernels/Utils.h"
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+ReverseV2::ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output)
+ : Kernel({input, axes}, {output})
+{
+}
+
+void ReverseV2::configure()
+{
+ assert(axes()->shape().num_dims() == 1);
+ assert(input()->shape().num_dims() >= axes()->shape().num_elements());
+ if (input()->element_type() != DataType::S32 && input()->element_type() != DataType::FLOAT32 &&
+ input()->element_type() != DataType::U8 && input()->element_type() != DataType::S16 &&
+ input()->element_type() != DataType::S64)
+ {
+ assert(false && "Unsupported input type.");
+ }
+ if (axes()->element_type() != DataType::S32)
+ {
+ assert(false && "Unsupported axes type.");
+ }
+ if (axes()->shape().num_elements() > 1)
+ {
+ assert(false && "Current implementation does not support more than 1 axis.");
+ }
+ int axis_value = getTensorData<int32_t>(axes())[0];
+ if (axis_value < 0 || axis_value >= input()->shape().num_dims())
+ {
+ assert(false && "Invalid axes value");
+ }
+ assert(input()->element_type() == output()->element_type());
+
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void ReverseV2::execute() const
+{
+ int axis_value = getTensorData<int32_t>(axes())[0];
+ switch (output()->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::reference_ops::Reverse<float>(axis_value, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ tflite::reference_ops::Reverse<uint8_t>(
+ axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+ break;
+ default:
+ assert(false && "Unsupported output type");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/Utils.h"
+
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Rsqrt::Rsqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Rsqrt::configure()
+{
+ if (input()->element_type() != output()->element_type())
+ {
+ assert(false && "Input/output tensor data type mismatch.");
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void Rsqrt::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Rsqrt::evalFloat() const
+{
+ auto in = getTensorData<float>(input());
+ auto out = getTensorData<float>(output());
+ auto size = getTensorShape(input()).FlatSize();
+ for (auto i = in; i != in + size; ++i)
+ {
+ *out = 1.f / std::sqrt(*i);
+ ++out;
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/Utils.h"
+#include "PALSVDF.h"
+
+#include <tensorflow/lite/kernels/internal/quantization_util.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+TfLiteFusedActivation get_tflite_activation(Activation activation)
+{
+ switch (activation)
+ {
+ case FusedActFunc::RELU:
+ return kTfLiteActRelu;
+ case FusedActFunc::RELU6:
+ return kTfLiteActRelu6;
+ case FusedActFunc::RELU_N1_TO_1:
+ return kTfLiteActReluN1To1;
+ case FusedActFunc::TANH:
+ return kTfLiteActTanh;
+ case FusedActFunc::SIGN_BIT:
+ return kTfLiteActSignBit;
+ case FusedActFunc::NONE:
+ return kTfLiteActNone;
+ default:
+ assert(false && "Unsupported activation type");
+ }
+}
+} // namespace
+
+SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+ const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+ Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+ Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+ const SVDFParams ¶ms)
+ : KernelWithParams<SVDFParams>({input, weight_feature, weight_time, bias, input_activation_state},
+ {output, scratchpad_activation_state, scratchpad_1, scratchpad_2,
+ scratchpad_3, scratchpad_4, scratchpad_5, scratchpad_6},
+ params)
+{
+ // Do nothing
+}
+
+void SVDF::configure()
+{
+ const Shape &input_shape = input()->shape();
+ const Shape &weight_features_shape = weight_feature()->shape();
+ const Shape &weight_time_shape = weight_time()->shape();
+
+ // Validate Input Tensor:
+ LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32 ||
+ input()->element_type() == DataType::S8);
+ LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 2);
+
+ // Validate inputs and output types
+ if (input()->element_type() == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == DataType::S8);
+ LUCI_INTERPRETER_CHECK(weight_time()->element_type() == DataType::S16 ||
+ weight_time()->element_type() == DataType::S8);
+ if (bias())
+ LUCI_INTERPRETER_CHECK(bias()->element_type() == DataType::S32);
+
+ LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == DataType::S16 ||
+ input_activation_state()->element_type() == DataType::S8);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8);
+
+ // Note: now tflite support only ReLU activation for integer SVDF
+ LUCI_INTERPRETER_CHECK(params().activation == FusedActFunc::RELU);
+ }
+ else if (weight_feature()->element_type() == DataType::FLOAT32)
+ {
+ LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(weight_time()->element_type() == DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == DataType::FLOAT32);
+ if (bias())
+ LUCI_INTERPRETER_CHECK(bias()->element_type() == DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+ }
+ else if ((weight_feature()->element_type() == DataType::U8 ||
+ weight_feature()->element_type() == DataType::S8) &&
+ input()->element_type() == DataType::FLOAT32)
+ {
+ // TODO:: support hybrid SVDF op
+ assert(false && "Hybrid type is not currently supported");
+ }
+ else
+ {
+ assert(false && "Unsupported type.");
+ }
+
+ // Check all the parameters of tensor match within themselves and match the
+ // input configuration.
+ const int rank = params().svdf_rank;
+ const int batch_size = input_shape.dim(0);
+ const int num_filters = weight_features_shape.dim(0);
+ LUCI_INTERPRETER_CHECK(rank != 0);
+ LUCI_INTERPRETER_CHECK(num_filters % rank == 0);
+
+ const int num_units = num_filters / rank;
+ const int memory_size = weight_time_shape.dim(1);
+
+ // Validate Weight_Feature Input Tensor:
+ LUCI_INTERPRETER_CHECK(weight_features_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(weight_features_shape.dim(1) == input_shape.dim(1));
+
+ // Validate Weight_Time Input Tensor:
+ LUCI_INTERPRETER_CHECK(weight_time_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(weight_time_shape.dim(0) == num_filters);
+
+ // Validate Bias
+ if (bias())
+ LUCI_INTERPRETER_CHECK(bias()->shape().dim(0) == num_units);
+
+ // Validate Input Activation State
+ LUCI_INTERPRETER_CHECK(input_activation_state()->shape().num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(0) == batch_size);
+ LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(1) == memory_size * num_filters);
+
+ // Resize scratchpad_state to input_activation_state
+ auto scratchpad_activation_state = getOutputTensors()[1];
+ scratchpad_activation_state->resize({batch_size, memory_size * num_filters});
+
+ // TODO: enable it only if kernel with dynamic shapes
+ // Resize output tensor
+ output()->resize({batch_size, num_units});
+
+ luci_interpreter_pal::SetupScratchpadTensor(
+ input()->element_type(), weight_feature()->element_type(), getOutputTensors()[2],
+ getOutputTensors()[3], getOutputTensors()[4], getOutputTensors()[5], getOutputTensors()[6],
+ getOutputTensors()[7], input_shape, weight_time_shape, batch_size, num_filters, num_units);
+}
+
+void SVDF::execute() const
+{
+ switch (weight_feature()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S8:
+ {
+ if (input()->element_type() == DataType::S8)
+ evalInteger();
+ else
+ // TODO:: support hybrid SVDF op
+ assert(false && "Hybrid type is not currently supported");
+ break;
+ }
+ default:
+ assert(false && "Unsupported type");
+ }
+}
+
+void SVDF::evalInteger() const
+{
+ const auto effective_scale_1 = static_cast<double>(input()->scale() * weight_feature()->scale() /
+ input_activation_state()->scale());
+ const auto effective_scale_2 = static_cast<double>(input_activation_state()->scale() *
+ weight_time()->scale() / output()->scale());
+
+ int32_t effective_scale_1_a;
+ int effective_scale_1_b;
+ int32_t effective_scale_2_a;
+ int effective_scale_2_b;
+
+ tflite::QuantizeMultiplier(effective_scale_1, &effective_scale_1_a, &effective_scale_1_b);
+ tflite::QuantizeMultiplier(effective_scale_2, &effective_scale_2_a, &effective_scale_2_b);
+
+ TfLiteSVDFParams params_svdf{};
+ params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+ params_svdf.rank = params().svdf_rank;
+ params_svdf.activation = get_tflite_activation(params().activation);
+
+ auto scratchpad_activation_state = getOutputTensors()[1];
+ // Note: it is expected that activation_state input variable tensor reset to zero,
+ // also expected that this variable tensor doesn't have buffer
+ auto scratchpad_data = getTensorData<int16_t>(scratchpad_activation_state);
+ std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+ auto scratchpad = getOutputTensors()[2];
+ auto output_temp = getOutputTensors()[3];
+
+ int32_t input_zp = input()->zero_point();
+ int32_t output_zp = output()->zero_point();
+ luci_interpreter_pal::IntegerSVDF(
+ params_svdf, getTensorShape(input()), getTensorData<int8_t>(input()),
+ getTensorShape(weight_feature()), getTensorData<int8_t>(weight_feature()),
+ getTensorShape(weight_time()), getTensorData<int16_t>(weight_time()), getTensorShape(bias()),
+ getTensorData<int32_t>(bias()), scratchpad_data, getTensorShape(output()),
+ getTensorData<int8_t>(output()), getTensorData<int32_t>(scratchpad),
+ getTensorData<int32_t>(output_temp), effective_scale_1_a, effective_scale_1_b,
+ effective_scale_2_a, effective_scale_2_b, input_zp, output_zp);
+}
+
+void SVDF::evalFloat() const
+{
+ TfLiteSVDFParams params_svdf{};
+ params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+ params_svdf.rank = params().svdf_rank;
+ params_svdf.activation = get_tflite_activation(params().activation);
+
+ auto scratchpad_activation_state = getOutputTensors()[1];
+ // Note: it is expected that activation_state input variable tensor reset to zero,
+ // also expected that this variable tensor doesn't have buffer
+ auto scratchpad_data = getTensorData<float>(scratchpad_activation_state);
+ std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+ auto scratchpad_1 = getOutputTensors()[2];
+
+ luci_interpreter_pal::FloatSVDF(
+ params_svdf, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(weight_feature()), getTensorData<float>(weight_feature()),
+ getTensorShape(weight_time()), getTensorData<float>(weight_time()), getTensorShape(bias()),
+ getTensorData<float>(bias()), getTensorData<float>(scratchpad_1), scratchpad_data,
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Shape.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ShapeKernel::ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams ¶ms)
+ : KernelWithParams<ShapeParams>({input}, {output}, params)
+{
+}
+
+void ShapeKernel::configure()
+{
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S32 or
+ output()->element_type() == DataType::S64);
+ const auto input_shape = input()->shape();
+
+ Shape output_shape(1);
+ output_shape.dim(0) = input_shape.num_dims();
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(output_shape);
+}
+
+void ShapeKernel::execute() const
+{
+ switch (params().out_type)
+ {
+ case DataType::S32:
+ evalInt<int32_t>();
+ break;
+ case DataType::S64:
+ evalInt<int64_t>();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+template <typename T> void ShapeKernel::evalInt() const
+{
+ const auto input_shape = input()->shape();
+
+ auto output_data = getTensorData<T>(output());
+
+ for (int i = 0; i < input_shape.num_dims(); ++i)
+ {
+ output_data[i] = input_shape.dim(i);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Slice.h"
+#include "Utils.h"
+#include "PALSlice.h"
+
+#include <cassert>
+#include <cstring>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+const int max_dim = 4;
+
+Slice::Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output)
+ : Kernel({input, begin, size}, {output})
+{
+}
+
+template <typename T>
+Shape calculateOutputShape(const Tensor *input, const Tensor *begin, const Tensor *size)
+{
+ Shape output_shape = Shape(input->shape().num_dims());
+ for (int idx = 0; idx < input->shape().num_dims(); idx++)
+ {
+ T size_value = getTensorData<T>(size)[idx];
+ if (size_value < 0)
+ {
+ if (size_value != -1)
+ {
+ assert(false && "Invalid size.");
+ }
+ size_value = input->shape().dim(idx) - getTensorData<T>(begin)[idx];
+ }
+ else
+ {
+ if (input->shape().dim(idx) < getTensorData<T>(begin)[idx] + size_value)
+ {
+ assert(false && "Invalid begin and size.");
+ }
+ }
+ output_shape.dim(idx) = static_cast<int>(size_value);
+ }
+ return output_shape;
+}
+
+template <typename T>
+void getBeginAndSizeVectors(int dimensions, const Tensor *begin, const Tensor *size,
+ std::vector<int> *begins, std::vector<int> *sizes)
+{
+ for (int idx = dimensions - 1; idx >= 0; --idx)
+ {
+ begins->push_back(getTensorData<T>(begin)[idx]);
+ sizes->push_back(getTensorData<T>(size)[idx]);
+ }
+}
+
+void Slice::configure()
+{
+ assert(input()->element_type() == output()->element_type());
+ assert(begin()->element_type() == DataType::S32 || begin()->element_type() == DataType::S64);
+ assert(size()->element_type() == DataType::S32 || size()->element_type() == DataType::S64);
+ assert(begin()->shape().num_dims() == 1);
+ assert(size()->shape().num_dims() == 1);
+ assert(input()->shape().num_dims() <= max_dim);
+ // TODO: enable it only if kernel with dynamic shapes
+ if (begin()->element_type() == DataType::S32)
+ {
+ output()->resize(calculateOutputShape<int32_t>(input(), begin(), size()));
+ }
+ else if (begin()->element_type() == DataType::S64)
+ {
+ output()->resize(calculateOutputShape<int64_t>(input(), begin(), size()));
+ }
+ else
+ {
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Slice::execute() const
+{
+ std::vector<int> begins;
+ begins.reserve(max_dim);
+ std::vector<int> sizes;
+ sizes.reserve(max_dim);
+ if (begin()->element_type() == DataType::S32)
+ {
+ getBeginAndSizeVectors<int32_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
+ }
+ else if (begin()->element_type() == DataType::S64)
+ {
+ getBeginAndSizeVectors<int64_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
+ }
+ else
+ {
+ assert(false && "Unsupported begin type.");
+ }
+ for (int i = input()->shape().num_dims(); i < max_dim; ++i)
+ {
+ begins.push_back(0);
+ sizes.push_back(1);
+ }
+
+ assert(begins.size() == 4);
+ assert(sizes.size() == 4);
+ tflite::SliceParams op_params{};
+ op_params.begin_count = 4;
+ op_params.size_count = 4;
+ for (int i = 0; i < 4; i++)
+ {
+ op_params.begin[i] = begins[3 - i];
+ op_params.size[i] = sizes[3 - i];
+ }
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ luci_interpreter_pal::Slice(op_params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ case DataType::S8:
+ luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+ getTensorData<int8_t>(input()), getTensorShape(output()),
+ getTensorData<int8_t>(output()));
+ break;
+ default:
+ assert(false && "Unsupported input type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+#include "PALSoftmax.h"
+
+namespace luci_interpreter
+{
+
+namespace
+{
+
+#ifndef DIS_FLOAT
+void evalFloat(const circle::Tensor *input, const circle::Tensor *output,
+ const circle::SoftmaxOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+ const auto *input_data = runtime_graph->getDataByTensor(input);
+ auto *output_data = runtime_graph->getDataByTensor(output);
+
+ tflite::SoftmaxParams op_params{};
+ op_params.beta = options->beta();
+
+ tflite::reference_ops::Softmax(
+ op_params, kernels::getTensorShape(input), kernels::getTensorData<float>(input_data),
+ kernels::getTensorShape(output), kernels::getTensorData<float>(output_data));
+}
+#endif // DIS_FLOAT
+
+#ifndef DIS_QUANT
+template <typename T>
+void evalQuantized(const circle::Tensor *input, const circle::Tensor *output,
+ const circle::SoftmaxOptions *options, BaseRuntimeGraph *runtime_graph)
+{
+ // TODO: Enable it
+ assert(false && "Not impl yet");
+
+ const auto *input_data = runtime_graph->getDataByTensor(input);
+ auto *output_data = runtime_graph->getDataByTensor(output);
+
+ tflite::SoftmaxParams op_params{};
+
+ luci_interpreter_pal::InitializeParams(&op_params, Tensor::scale(input), options->beta());
+ luci_interpreter_pal::Softmax(
+ op_params, kernels::getTensorShape(input), kernels::getTensorData<T>(input_data),
+ kernels::getTensorShape(output), kernels::getTensorData<T>(output_data));
+}
+#endif
+
+} // namespace
+
+void configure_kernel_CircleSoftmax(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
+{
+ const auto input_index = cur_op->inputs()->operator[](0);
+ const auto output_index = cur_op->outputs()->operator[](0);
+
+ assert(input_index != -1);
+ assert(output_index != -1);
+
+ const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+ auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+ assert(input != nullptr);
+ assert(output != nullptr);
+
+ LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == Tensor::element_type(output));
+ LUCI_INTERPRETER_CHECK(Tensor::num_dims(input) >= 1);
+
+#ifndef DIS_QUANT
+ if (Tensor::element_type(input) == DataType::U8 || Tensor::element_type(input) == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == DataType::S8 ||
+ Tensor::zero_point(output) == 0);
+ LUCI_INTERPRETER_CHECK(Tensor::element_type(input) == DataType::U8 ||
+ Tensor::zero_point(output) == std::numeric_limits<int8_t>::min());
+ }
+#endif
+}
+
+void execute_kernel_CircleSoftmax(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph,
+ bool)
+{
+ const auto input_index = cur_op->inputs()->operator[](0);
+ const auto output_index = cur_op->outputs()->operator[](0);
+
+ assert(input_index != -1);
+ assert(output_index != -1);
+
+ const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+ auto output = runtime_graph->getCircleTensorByIndex(output_index);
+
+ assert(input != nullptr);
+ assert(output != nullptr);
+
+ const auto *options = cur_op->builtin_options_as_SoftmaxOptions();
+
+ switch (Tensor::element_type(input))
+ {
+#ifndef DIS_FLOAT
+ case DataType::FLOAT32:
+ evalFloat(input, output, options, runtime_graph);
+ break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+ case DataType::S8:
+ evalQuantized<int8_t>(input, output, options, runtime_graph);
+ break;
+ case DataType::U8:
+ evalQuantized<uint8_t>(input, output, options, runtime_graph);
+ break;
+#endif // DIS_QUANT
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// TODO enable it
+#if 0
+#include "kernels/Softmax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> constexpr loco::DataType toLocoDataType();
+
+template <> constexpr loco::DataType toLocoDataType<float>() { return loco::DataType::FLOAT32; }
+
+template <> constexpr loco::DataType toLocoDataType<uint8_t>() { return loco::DataType::U8; }
+
+template <> constexpr loco::DataType toLocoDataType<int8_t>() { return loco::DataType::S8; }
+
+template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<toLocoDataType<T>()>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(toLocoDataType<T>());
+
+ SoftmaxParams params{};
+ params.beta = 0.1;
+
+ Softmax kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ std::pair<float, int32_t> input_quant_param =
+ quantizationParams<T>(std::min<float>(std::min<float>(input_data), 0.f),
+ std::max<float>(std::max<float>(input_data), 0.f));
+ std::pair<float, int32_t> output_quant_param =
+ quantizationParams<T>(std::min<float>(std::min<float>(output_data), 0.f),
+ std::max<float>(std::max<float>(output_data), 0.f));
+ Tensor input_tensor = makeInputTensor<toLocoDataType<T>()>(input_shape, input_quant_param.first,
+ input_quant_param.second, input_data,
+ memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(toLocoDataType<T>(), output_quant_param.first, output_quant_param.second);
+
+ SoftmaxParams params{};
+ params.beta = 0.1;
+
+ Softmax kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class SoftmaxTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(SoftmaxTest, DataTypes);
+
+TYPED_TEST(SoftmaxTest, Simple)
+{
+ Check<TypeParam>({2, 1, 2, 3}, {2, 1, 2, 3},
+ {
+ 5, -9, 8, //
+ -7, 2, -4, //
+ 1, -2, 9, //
+ 3, -6, -1, //
+ },
+ {
+ 0.38514, 0.09497, 0.51989, //
+ 0.20792, 0.51141, 0.28067, //
+ 0.25212, 0.18678, 0.56110, //
+ 0.48149, 0.19576, 0.32275, //
+ });
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToBatchND.h"
+#include "kernels/Utils.h"
+
+#include "PALSpaceToBatchND.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+
+} // namespace
+
+SpaceToBatchND::SpaceToBatchND(const Tensor *input, const Tensor *block_shape,
+ const Tensor *paddings, Tensor *output)
+ : Kernel({input, block_shape, paddings}, {output})
+{
+}
+
+void SpaceToBatchND::configure()
+{
+ const auto *block_shape_data = block_shape()->data<int32_t>();
+ const auto *paddings_data = paddings()->data<int32_t>();
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+ int spatial_dims_num = input()->shape().num_dims() - 2;
+
+ LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+ LUCI_INTERPRETER_CHECK(paddings()->shape().num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(paddings()->shape().dim(0) == spatial_dims_num);
+ LUCI_INTERPRETER_CHECK(paddings()->shape().dim(1) == 2);
+
+ Shape output_shape = Shape(input()->shape().num_dims());
+ int output_batch_size = input()->shape().dim(0);
+ for (int i = 0; i < spatial_dims_num; ++i)
+ {
+ int final_dim_size =
+ (input()->shape().dim(i + 1) + paddings_data[i * 2] + paddings_data[i * 2 + 1]);
+ LUCI_INTERPRETER_CHECK(final_dim_size % block_shape_data[i] == 0);
+ output_shape.dim(i + 1) = final_dim_size / block_shape_data[i];
+ output_batch_size = output_batch_size * block_shape_data[i];
+ }
+ output_shape.dim(0) = output_batch_size;
+ output_shape.dim(input()->shape().num_dims() - 1) =
+ input()->shape().dim(input()->shape().num_dims() - 1);
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(output_shape);
+}
+
+void SpaceToBatchND::execute() const
+{
+ switch (input()->element_type())
+ {
+ tflite::SpaceToBatchParams op_params;
+ case DataType::FLOAT32:
+ op_params.output_offset = 0;
+ luci_interpreter_pal::SpaceToBatchND(
+ op_params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+ getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ op_params.output_offset = output()->zero_point();
+ luci_interpreter_pal::SpaceToBatchND(
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+ getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SpaceToDepth.h"
+#include "Utils.h"
+#include "PALSpaceToDepth.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SpaceToDepth::SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams ¶ms)
+ : KernelWithParams<SpaceToDepthParams>({input}, {output}, params)
+{
+}
+
+void SpaceToDepth::configure()
+{
+ assert(input()->shape().num_dims() == 4);
+ assert(output()->element_type() == DataType::FLOAT32 ||
+ output()->element_type() == DataType::U8 || output()->element_type() == DataType::S8 ||
+ output()->element_type() == DataType::S32 || output()->element_type() == DataType::S64);
+ assert(input()->element_type() == output()->element_type());
+
+ const int block_size = params().block_size;
+ const int32_t input_height = input()->shape().dim(1);
+ const int32_t input_width = input()->shape().dim(2);
+ int32_t output_height = input_height / block_size;
+ int32_t output_width = input_width / block_size;
+
+ assert(input_height == output_height * block_size);
+ assert(input_width == output_width * block_size);
+
+ Shape output_shape(4);
+ output_shape.dim(0) = input()->shape().dim(0);
+ output_shape.dim(1) = output_height;
+ output_shape.dim(2) = output_width;
+ output_shape.dim(3) = input()->shape().dim(3) * block_size * block_size;
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(output_shape);
+}
+
+void SpaceToDepth::execute() const
+{
+ tflite::SpaceToDepthParams op_params{};
+ op_params.block_size = params().block_size;
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Split.h"
+
+#include "Utils.h"
+
+#include "PALSplit.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Split::Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs)
+ : Kernel({axis, input}, std::move(outputs))
+{
+}
+
+void Split::configure()
+{
+ assert(axis()->shape().num_elements() == 1);
+ _axis_value = getTensorData<int32_t>(axis())[0];
+ if (_axis_value < 0)
+ _axis_value += input()->shape().num_dims();
+ assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
+
+ const int32_t input_size = input()->shape().dim(_axis_value);
+ assert(input_size % _outputs.size() == 0);
+ const int32_t slice_size = input_size / _outputs.size();
+ // TODO: enable it only if kernel with dynamic shapes
+ Shape output_shape = input()->shape();
+ output_shape.dim(_axis_value) = slice_size;
+ for (Tensor *output : _outputs)
+ {
+ output->resize(output_shape);
+ }
+}
+
+void Split::execute() const
+{
+ tflite::SplitParams params{};
+ params.num_split = _outputs.size();
+ params.axis = _axis_value;
+
+#define TF_LITE_SPLIT(scalar) \
+ { \
+ VectorOfTensors<scalar, false> all_outputs(_outputs); \
+ luci_interpreter_pal::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+ all_outputs.shapes(), all_outputs.data()); \
+ }
+
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ TF_LITE_SPLIT(float);
+ break;
+ case DataType::U8:
+ TF_LITE_SPLIT(uint8_t);
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+#undef TF_LITE_SPLIT
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SplitV.h"
+
+#include "Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SplitV::SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis,
+ std::vector<Tensor *> outputs)
+ : Kernel({input, size_splits, axis}, std::move(outputs))
+{
+}
+
+void SplitV::configure()
+{
+ assert(axis()->shape().num_elements() == 1);
+ _axis_value = getTensorData<int32_t>(axis())[0];
+ if (_axis_value < 0)
+ _axis_value += input()->shape().num_dims();
+ assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
+
+ auto num_split = static_cast<int32_t>(_outputs.size());
+ auto sizes_data = getTensorData<int32_t>(size_splits());
+
+ assert(size_splits()->shape().num_dims() == 1);
+
+ int32_t sum = 0;
+ const auto num_dims_size_spits = size_splits()->shape().dim(0);
+ int32_t count_neg_dim = 0;
+
+ for (int32_t i = 0; i < num_dims_size_spits - 1; ++i)
+ {
+ if (sizes_data[i] != -1)
+ {
+ sum += sizes_data[i];
+ }
+ else
+ {
+ count_neg_dim++;
+ }
+ }
+ assert(count_neg_dim < 2);
+ assert(size_splits()->shape().num_elements() == num_split);
+
+ // TODO: enable it only if kernel with dynamic shapes
+ auto output_shape = input()->shape();
+ for (int32_t i = 0; i < num_split; ++i)
+ {
+ if (sizes_data[i] == -1)
+ {
+ output_shape.dim(_axis_value) = input()->shape().dim(_axis_value) - sum;
+ }
+ else
+ {
+ output_shape.dim(_axis_value) = sizes_data[i];
+ }
+ _outputs[i]->resize(output_shape);
+ }
+}
+
+void SplitV::execute() const
+{
+ tflite::SplitParams params{};
+ params.num_split = _outputs.size();
+ params.axis = _axis_value;
+
+#define TF_LITE_SPLIT(scalar) \
+ { \
+ VectorOfTensors<scalar, false> all_outputs(_outputs); \
+ tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+ all_outputs.shapes(), all_outputs.data()); \
+ }
+
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ TF_LITE_SPLIT(float);
+ break;
+ case DataType::U8:
+ TF_LITE_SPLIT(uint8_t);
+ break;
+ case DataType::S16:
+ TF_LITE_SPLIT(int16_t);
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+#undef TF_LITE_SPLIT
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/Utils.h"
+
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Sqrt::Sqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Sqrt::configure()
+{
+ if (input()->element_type() != output()->element_type())
+ {
+ assert(false && "Input/output tensor data type mismatch.");
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void Sqrt::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Sqrt::evalFloat() const
+{
+ auto in = getTensorData<float>(input());
+ auto out = getTensorData<float>(output());
+ auto size = getTensorShape(input()).FlatSize();
+ for (auto i = in; i != in + size; ++i)
+ {
+ *out = std::sqrt(*i);
+ ++out;
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Square.h"
+#include "kernels/Utils.h"
+
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Square::Square(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Square::configure()
+{
+ if (input()->element_type() != output()->element_type())
+ {
+ assert(false && "Input/output tensor data type mismatch.");
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void Square::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Square::evalFloat() const
+{
+ auto in = getTensorData<float>(input());
+ auto out = getTensorData<float>(output());
+ auto size = getTensorShape(input()).FlatSize();
+ for (auto i = in; i != in + size; ++i)
+ {
+ *out = (*i) * (*i);
+ ++out;
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SquaredDifference.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SquaredDifference::SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output)
+ : Kernel({input1, input2}, {output})
+{
+}
+
+void SquaredDifference::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void SquaredDifference::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalSquaredDifference<float>();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+template <typename T> inline void SquaredDifference::evalSquaredDifference() const
+{
+ BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()), [](T x, T y) {
+ const T difference = x - y;
+ return difference * difference;
+ });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Squeeze.h"
+
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Squeeze::Squeeze(const Tensor *input, Tensor *output, const SqueezeParams ¶ms)
+ : KernelWithParams<SqueezeParams>({input}, {output}, params)
+{
+}
+
+void Squeeze::configure()
+{
+ int input_num_dims = input()->shape().num_dims();
+ int num_squeeze_dims = params().squeeze_dims.size();
+ assert(input_num_dims <= 8);
+ bool should_squeeze[8] = {false};
+ int num_squeezed_dims = 0;
+ if (num_squeeze_dims == 0)
+ {
+ for (int idx = 0; idx < input_num_dims; ++idx)
+ {
+ if (input()->shape().dim(idx) == 1)
+ {
+ should_squeeze[idx] = true;
+ ++num_squeezed_dims;
+ }
+ }
+ }
+ else
+ {
+ for (int idx = 0; idx < num_squeeze_dims; ++idx)
+ {
+ int current = params().squeeze_dims[idx] < 0 ? params().squeeze_dims[idx] + input_num_dims
+ : params().squeeze_dims[idx];
+ assert(current >= 0 && current < input_num_dims && input()->shape().dim(current) == 1);
+ if (!should_squeeze[current])
+ ++num_squeezed_dims;
+ should_squeeze[current] = true;
+ }
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ Shape output_shape(input_num_dims - num_squeezed_dims);
+ for (int in_idx = 0, out_idx = 0; in_idx < input_num_dims; ++in_idx)
+ {
+ if (!should_squeeze[in_idx])
+ {
+ output_shape.dim(out_idx++) = input()->shape().dim(in_idx);
+ }
+ }
+ output()->resize(output_shape);
+}
+
+void Squeeze::execute() const
+{
+ assert(input()->shape().num_elements() == output()->shape().num_elements());
+
+ const auto *input_data = input()->data<void>();
+ auto *output_data = output()->data<void>();
+ std::memcpy(output_data, input_data,
+ getDataTypeSize(input()->element_type()) * input()->shape().num_elements());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/StridedSlice.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/strided_slice.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+StridedSlice::StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end,
+ const Tensor *strides, Tensor *output, const StridedSliceParams ¶ms)
+ : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params)
+{
+}
+
+void StridedSlice::configure()
+{
+ assert(begin()->shape().num_dims() == 1);
+ assert(end()->shape().num_dims() == 1);
+ assert(strides()->shape().num_dims() == 1);
+ assert(input()->element_type() == output()->element_type());
+ assert(begin()->element_type() == DataType::S32);
+ assert(end()->element_type() == DataType::S32);
+ assert(strides()->element_type() == DataType::S32);
+ assert(input()->shape().num_dims() <= 4);
+ if (params().ellipsis_mask != 0)
+ {
+ assert(false && "ellipsis_mask is not implemented yet.");
+ }
+ if (params().new_axis_mask != 0)
+ {
+ assert(false && "new_axis_mask is not implemented yet.");
+ }
+ if (input()->element_type() == DataType::U8)
+ {
+ assert(input()->scale() == output()->scale());
+ assert(input()->zero_point() == output()->zero_point());
+ }
+ tflite::StridedSliceParams op_params{};
+ op_params.start_indices_count = input()->shape().num_dims();
+ op_params.stop_indices_count = input()->shape().num_dims();
+ op_params.strides_count = input()->shape().num_dims();
+
+ for (int i = 0; i < input()->shape().num_dims(); i++)
+ {
+ op_params.start_indices[i] = getTensorData<int32_t>(begin())[i];
+ op_params.stop_indices[i] = getTensorData<int32_t>(end())[i];
+ op_params.strides[i] = getTensorData<int32_t>(strides())[i];
+ }
+ op_params.begin_mask = params().begin_mask;
+ op_params.ellipsis_mask = 0;
+ op_params.end_mask = params().end_mask;
+ op_params.new_axis_mask = 0;
+ op_params.shrink_axis_mask = params().shrink_axis_mask;
+ std::vector<int32_t> output_shape_vector;
+ for (int i = 0; i < input()->shape().num_dims(); i++)
+ {
+ int idx = input()->shape().num_dims() - i - 1;
+ int32_t stride = getTensorData<int32_t>(strides())[idx];
+ assert(stride != 0);
+ int32_t begin = ::tflite::strided_slice::StartForAxis(op_params, getTensorShape(input()), idx);
+ int32_t end =
+ ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin);
+
+ const bool shrink_axis = params().shrink_axis_mask & (1 << idx);
+ if (shrink_axis)
+ {
+ end = begin + 1;
+ }
+
+ int32_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride));
+ dim_shape = dim_shape < 0 ? 0 : dim_shape;
+ if (!shrink_axis)
+ {
+ output_shape_vector.push_back(dim_shape);
+ }
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ Shape output_shape = Shape(output_shape_vector.size());
+ for (size_t i = 0; i < output_shape_vector.size(); i++)
+ {
+ output_shape.dim(i) = output_shape_vector[output_shape_vector.size() - i - 1];
+ }
+ output()->resize(output_shape);
+}
+
+void StridedSlice::execute() const
+{
+ tflite::StridedSliceParams op_params{};
+ op_params.start_indices_count = input()->shape().num_dims();
+ op_params.stop_indices_count = input()->shape().num_dims();
+ op_params.strides_count = input()->shape().num_dims();
+
+ for (int i = 0; i < input()->shape().num_dims(); i++)
+ {
+ op_params.start_indices[i] = getTensorData<int32_t>(begin())[i];
+ op_params.stop_indices[i] = getTensorData<int32_t>(end())[i];
+ op_params.strides[i] = getTensorData<int32_t>(strides())[i];
+ }
+ op_params.begin_mask = params().begin_mask;
+ op_params.ellipsis_mask = 0;
+ op_params.end_mask = params().end_mask;
+ op_params.new_axis_mask = 0;
+ op_params.shrink_axis_mask = params().shrink_axis_mask;
+
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ case DataType::S32:
+ tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+ getTensorData<int32_t>(input()), getTensorShape(output()),
+ getTensorData<int32_t>(output()));
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sub.h"
+#include "kernels/Utils.h"
+
+#include "PALSub.h"
+
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams ¶ms)
+ : KernelWithParams<SubParams>({input1, input2}, {output}, params)
+{
+}
+
+void Sub::configure()
+{
+ LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type()))
+ LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type()))
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Sub::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Sub::evalFloat() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<float>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastSubSlow(
+ params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+ getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+ }
+ else
+ {
+ luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
+ getTensorShape(input2()), getTensorData<float>(input2()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ }
+}
+
+template <typename T> void Sub::evalInteger() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<T>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastSubSlow(
+ params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+ getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
+void Sub::evalQuantized() const
+{
+ const auto input1_scale = static_cast<double>(input1()->scale());
+ const auto input2_scale = static_cast<double>(input2()->scale());
+ const auto output_scale = static_cast<double>(output()->scale());
+
+ const int left_shift = 20;
+ const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+ const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+ const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+ const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+ int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+ int input1_shift{}, input2_shift{}, output_shift{};
+ quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+ quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+ quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::ArithmeticParams params{};
+ params.left_shift = left_shift;
+ // The kernel expects inputs' zero points to be negated.
+ params.input1_offset = -input1()->zero_point(); // Note the '-'.
+ params.input1_multiplier = input1_multiplier;
+ params.input1_shift = input1_shift;
+ params.input2_offset = -input2()->zero_point(); // Note the '-'.
+ params.input2_multiplier = input2_multiplier;
+ params.input2_shift = input2_shift;
+ params.output_offset = output()->zero_point();
+ params.output_multiplier = output_multiplier;
+ params.output_shift = output_shift;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), ¶ms);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastSubSlow(
+ params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+ getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+ getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Tanh.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/tanh.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Tanh::Tanh(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Tanh::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ if (input()->element_type() == DataType::U8)
+ {
+ populateLookupTable();
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(input()->shape());
+}
+
+void Tanh::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void Tanh::evalFloat() const
+{
+ tflite::reference_ops::Tanh(getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void Tanh::evalQuantized() const
+{
+ const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+ uint8_t *output_data = getTensorData<uint8_t>(output());
+ const uint8_t *input_data = getTensorData<uint8_t>(input());
+ for (int i = 0; i < size; ++i)
+ {
+ output_data[i] = getTableValue(input_data[i]);
+ }
+}
+
+void Tanh::populateLookupTable()
+{
+ const auto input_scale = static_cast<double>(input()->scale());
+ const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
+ const auto output_scale = static_cast<double>(output()->scale());
+ const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
+ const float inverse_scale = 1 / output_scale;
+ int32_t maxval = std::numeric_limits<uint8_t>::max();
+ int32_t minval = std::numeric_limits<uint8_t>::min();
+ for (int32_t val = minval; val <= maxval; ++val)
+ {
+ const float dequantized = input_scale * (val - input_zero_point);
+ const float transformed = std::tanh(dequantized);
+ const float rescaled = std::round(transformed * inverse_scale);
+ const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+ setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
+ static_cast<uint8_t>(val));
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace testing
+{
+
+using ::testing::FloatNear;
+using ::testing::Matcher;
+
+Tensor makeOutputTensor(DataType element_type) { return Tensor(element_type, {}, {}, ""); }
+
+Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point)
+{
+ return Tensor(element_type, {}, {{scale}, {zero_point}}, "");
+}
+
+std::vector<float> dequantizeTensorData(const Tensor &tensor)
+{
+ if (tensor.element_type() == DataType::U8)
+ {
+ std::vector<uint8_t> data = extractTensorData<uint8_t>(tensor);
+ return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
+ }
+ if (tensor.element_type() == DataType::S8)
+ {
+ std::vector<int8_t> data = extractTensorData<int8_t>(tensor);
+ return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
+ }
+ else if (tensor.element_type() == DataType::S16)
+ {
+ // S16 quantization is symmetric, so zero point should be zero.
+ for (auto zp : tensor.zero_points())
+ {
+ (void)zp;
+ assert(zp == 0);
+ }
+
+ std::vector<int16_t> data = extractTensorData<int16_t>(tensor);
+ if (tensor.scales().size() == 1)
+ {
+ return dequantize(data.data(), data.size(), tensor.scale(), 0);
+ }
+
+ // quantize_dimension breaks shape into two parts:
+ // inner dimensions that contains continuous data with one quantization type
+ // outer dimensions that contains other dimensions
+ const Shape shape = tensor.shape();
+ const int32_t quantized_dimension = tensor.quantized_dimension();
+ assert(quantized_dimension < shape.num_dims());
+ size_t outer_dims_size = 1;
+ int32_t quant_dim_size = shape.dim(quantized_dimension);
+ size_t inner_dims_size = 1;
+ assert(quant_dim_size == tensor.scales().size());
+
+ for (int i = 0; i < quantized_dimension; ++i)
+ outer_dims_size *= shape.dim(i);
+ for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i)
+ inner_dims_size *= shape.dim(i);
+
+ assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size);
+
+ std::vector<float> dequantized_data;
+ dequantized_data.reserve(shape.num_elements());
+ for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it)
+ for (int32_t channel = 0; channel < quant_dim_size; ++channel)
+ {
+ float scale = tensor.scales()[channel];
+ size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
+ std::vector<float> part_dequantized_data =
+ dequantize(data.data() + offset, inner_dims_size, scale, 0);
+ dequantized_data.insert(dequantized_data.end(), part_dequantized_data.begin(),
+ part_dequantized_data.end());
+ }
+ return dequantized_data;
+ }
+ else
+ {
+ assert(false && "Unsupported type.");
+ }
+}
+
+Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values, float max_abs_error)
+{
+ std::vector<Matcher<float>> matchers;
+ matchers.reserve(values.size());
+ for (const float v : values)
+ {
+ matchers.emplace_back(FloatNear(v, max_abs_error));
+ }
+ return ElementsAreArray(matchers);
+}
+
+std::vector<int32_t> extractTensorShape(const Tensor &tensor)
+{
+ std::vector<int32_t> result;
+ int dims = tensor.shape().num_dims();
+ for (int i = 0; i < dims; i++)
+ {
+ result.push_back(tensor.shape().dim(i));
+ }
+ return result;
+}
+
+} // namespace testing
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Transpose.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Transpose::Transpose(const Tensor *input, const Tensor *perm, Tensor *output)
+ : Kernel({input, perm}, {output})
+{
+}
+
+void Transpose::configure()
+{
+ // Transpose op only supports 1D-4D input arrays.
+ int dims = input()->shape().num_dims();
+ const int32_t *perm_data = getTensorData<int32_t>(perm());
+
+ assert(input()->shape().num_dims() <= 4);
+ assert(input()->element_type() == output()->element_type());
+
+ assert(perm()->shape().num_dims() == 1);
+ assert(perm()->shape().dim(0) == dims);
+
+ Shape output_shape(dims);
+ for (int i = 0; i < dims; i++)
+ {
+ assert(perm_data[i] < dims && perm_data[i] >= 0);
+ output_shape.dim(i) = input()->shape().dim(perm_data[i]);
+ }
+ // TODO: enable it only if kernel with dynamic shapes
+
+ output()->resize(output_shape);
+}
+
+void Transpose::execute() const
+{
+ tflite::TransposeParams params{};
+ const int32_t *perm_data = getTensorData<int32_t>(perm());
+ const int32_t size = perm()->shape().dim(0);
+ params.perm_count = size;
+ for (int i = 0; i < size; i++)
+ params.perm[i] = perm_data[i];
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::reference_ops::Transpose(params, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ tflite::reference_ops::Transpose(params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TransposeConv.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
+ const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
+ const TransposeConvParams ¶ms)
+ : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias},
+ {output, scratch_tensor}, params)
+{
+}
+
+TransposeConv::~TransposeConv()
+{
+ // Define destructor here, to delete vector of qunatized multipliers properly
+}
+
+void TransposeConv::configure()
+{
+ assert(output_shape()->shape().num_dims() == 1);
+ assert(input()->shape().num_dims() == 4);
+ assert(filter()->shape().num_dims() == 4);
+ assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8 ||
+ input()->element_type() == DataType::S16);
+ assert(input()->element_type() == output()->element_type());
+ assert(input()->shape().dim(3) == filter()->shape().dim(3));
+
+ const int num_dims = output_shape()->shape().dim(0);
+ Shape out_shape(num_dims);
+ const auto *shape_data = getTensorData<int32_t>(output_shape());
+ for (int i = 0; i < num_dims; i++)
+ out_shape.dim(i) = shape_data[i];
+ // TODO: enable it only if kernel with dynamic shapes
+ output()->resize(out_shape);
+
+ const int32_t filter_height = filter()->shape().dim(1);
+ const int32_t filter_width = filter()->shape().dim(2);
+ const int32_t output_height = out_shape.dim(1);
+ const int32_t output_width = out_shape.dim(2);
+
+ const int32_t unused_output_height =
+ computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
+ const int32_t unused_output_width =
+ computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
+
+ _padding_height =
+ computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
+ _padding_width =
+ computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
+
+ if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+ {
+ auto scratch_tensor = getOutputTensors()[1];
+ scratch_tensor->resize(output()->shape());
+ const std::vector<double> real_multipliers =
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+ _quant_multipliers = quantizeMultipliers(real_multipliers);
+ }
+ else
+ {
+ auto scratch_tensor = getOutputTensors()[1];
+ scratch_tensor->set_allocatable(false);
+ }
+}
+
+void TransposeConv::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ if (filter()->scales().size() == 1)
+ {
+ evalQuantized();
+ }
+ else if (filter()->scales().size() > 1)
+ {
+ LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+ static_cast<size_t>(filter()->shape().dim(0)));
+ evalQuantizedPerChannel();
+ }
+ break;
+ case DataType::S16:
+ evalQuantizedS16();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+void TransposeConv::evalFloat() const
+{
+ tflite::ConvParams op_params{};
+ op_params.padding_type = tflite::PaddingType::kSame;
+ op_params.padding_values.height = _padding_height;
+ op_params.padding_values.width = _padding_width;
+ op_params.stride_height = params().stride_height;
+ op_params.stride_width = params().stride_width;
+ tflite::reference_ops::TransposeConv(op_params, //
+ getTensorShape(input()), getTensorData<float>(input()), //
+ getTensorShape(filter()), getTensorData<float>(filter()), //
+ getTensorShape(bias()), getTensorData<float>(bias()), //
+ getTensorShape(output()), getTensorData<float>(output()), //
+ tflite::RuntimeShape(), nullptr);
+}
+
+void TransposeConv::evalQuantized() const
+{
+ tflite::ConvParams op_params{};
+ op_params.padding_type = tflite::PaddingType::kSame;
+ op_params.padding_values.height = _padding_height;
+ op_params.padding_values.width = _padding_width;
+ op_params.stride_height = params().stride_height;
+ op_params.stride_width = params().stride_width;
+ // The kernel expects input and filter zero points to be negated.
+ op_params.input_offset = -input()->zero_point(); // Note the '-'.
+ op_params.weights_offset = -filter()->zero_point(); // Note the '-'.
+ op_params.output_offset = output()->zero_point();
+ op_params.output_multiplier = _quant_multipliers[0].multiplier;
+ op_params.output_shift = _quant_multipliers[0].shift;
+ op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
+ op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
+
+ auto scratch_tensor = getOutputTensors()[1];
+
+ tflite::reference_ops::TransposeConv(
+ op_params, //
+ getTensorShape(input()), getTensorData<uint8_t>(input()), //
+ getTensorShape(filter()), getTensorData<uint8_t>(filter()), //
+ getTensorShape(bias()), getTensorData<int32_t>(bias()), //
+ getTensorShape(output()), getTensorData<uint8_t>(output()), //
+ tflite::RuntimeShape(), nullptr, //
+ getTensorData<int32_t>(scratch_tensor));
+}
+
+void TransposeConv::evalQuantizedPerChannel() const
+{
+ const auto *input_data = getTensorData<uint8_t>(input());
+ const auto *filter_data = getTensorData<uint8_t>(filter());
+ const auto *bias_data = getTensorData<int32_t>(bias());
+ auto *output_data = getTensorData<uint8_t>(output());
+
+ auto scratch_tensor = getOutputTensors()[1];
+ auto *scratch_data = getTensorData<int32_t>(scratch_tensor);
+
+ const Shape &input_shape = input()->shape();
+ const Shape &filter_shape = filter()->shape();
+ const Shape &output_shape = output()->shape();
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t input_depth = input_shape.dim(3);
+ const int32_t output_depth = filter_shape.dim(0);
+ const int32_t filter_height = filter_shape.dim(1);
+ const int32_t filter_width = filter_shape.dim(2);
+ const int32_t output_height = output_shape.dim(1);
+ const int32_t output_width = output_shape.dim(2);
+
+ const int32_t stride_height = _params.stride_height;
+ const int32_t stride_width = _params.stride_width;
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
+
+ std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t));
+
+ BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
+ for (int32_t batch = 0; batch < batches; ++batch)
+ {
+ for (int32_t in_y = 0; in_y < input_height; ++in_y)
+ {
+ for (int32_t in_x = 0; in_x < input_width; ++in_x)
+ {
+ for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+ {
+ const int32_t out_y_origin = in_y * stride_height - _padding_height;
+ const int32_t out_x_origin = in_x * stride_width - _padding_width;
+ for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int32_t out_x = out_x_origin + filter_x;
+ const int32_t out_y = out_y_origin + filter_y;
+ if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
+ {
+ for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+ {
+ const uint8_t input_val =
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+ const uint8_t filter_val =
+ filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+ scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
+ static_cast<int32_t>(input_val - input()->zero_point()) *
+ static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ for (int32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+ {
+ int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
+ if (bias_data)
+ {
+ acc += bias_data[out_c];
+ }
+
+ int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+ acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+
+ scaled_acc += output()->zero_point();
+ scaled_acc = std::max(scaled_acc, activation_min);
+ scaled_acc = std::min(scaled_acc, activation_max);
+
+ output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+ }
+ }
+ }
+ }
+}
+
+void TransposeConv::evalQuantizedS16() const
+{
+ const auto *input_data = getTensorData<int16_t>(input());
+ const auto *filter_data = getTensorData<int16_t>(filter());
+ const auto *bias_data = getTensorData<int64_t>(bias());
+ auto *output_data = getTensorData<int16_t>(output());
+
+ auto scratch_tensor = getOutputTensors()[1];
+ auto *scratch_data = getTensorData<int64_t>(scratch_tensor);
+
+ const Shape &input_shape = input()->shape();
+ const Shape &filter_shape = filter()->shape();
+ const Shape &output_shape = output()->shape();
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t input_depth = input_shape.dim(3);
+ const int32_t output_depth = filter_shape.dim(0);
+ const int32_t filter_height = filter_shape.dim(1);
+ const int32_t filter_width = filter_shape.dim(2);
+ const int32_t output_height = output_shape.dim(1);
+ const int32_t output_width = output_shape.dim(2);
+
+ const int32_t stride_height = _params.stride_height;
+ const int32_t stride_width = _params.stride_width;
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
+
+ std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t));
+
+ BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
+ for (int32_t batch = 0; batch < batches; ++batch)
+ {
+ for (int32_t in_y = 0; in_y < input_height; ++in_y)
+ {
+ for (int32_t in_x = 0; in_x < input_width; ++in_x)
+ {
+ for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+ {
+ const int32_t out_y_origin = in_y * stride_height - _padding_height;
+ const int32_t out_x_origin = in_x * stride_width - _padding_width;
+ for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int32_t out_x = out_x_origin + filter_x;
+ const int32_t out_y = out_y_origin + filter_y;
+ if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
+ {
+ for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+ {
+ const int16_t input_val =
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+ const int16_t filter_val =
+ filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+ scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
+ static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ for (int32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+ {
+ int64_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
+ if (bias_data)
+ {
+ acc += bias_data[out_c];
+ }
+ int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+ acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+
+ scaled_acc = std::max(scaled_acc, activation_min);
+ scaled_acc = std::min(scaled_acc, activation_max);
+
+ output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+ }
+ }
+ }
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+#include "kernels/Utils.h"
+
+#include "PALUnidirectionalSequenceLSTM.h"
+#include "PALApplyActivationToVector.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+#ifndef DIS_QUANT
+
+bool checkedLog2(const float x, int *log2_result)
+{
+ // Using TfLiteRound instead of std::round and std::log instead of
+ // std::log2 to work around these functions being missing in a toolchain
+ // used in some TensorFlow tests as of May 2018.
+ const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
+ const float x_log2_rounded = std::round(x_log2);
+ const float x_log2_fracpart = x_log2 - x_log2_rounded;
+
+ *log2_result = static_cast<int>(x_log2_rounded);
+ return std::abs(x_log2_fracpart) < 1e-3f;
+}
+
+// Create parameters for element wise multiplication that happens in a) cell
+// state update ; b) hidden state update
+// Note that all the output of gates are symmetrically quantized so only scales
+// are required for input. However, during the hidden state update phase, the
+// output is the updated hidden state, which is asymmetrically quantized. Thus
+// output may require zero point
+lstm::ArithmeticParams createInterGateParams(const float input1_scale, const float input2_scale,
+ const float output_scale, const DataType output_type,
+ const int output_zp)
+{
+ lstm::ArithmeticParams op_params;
+ if (output_type == DataType::S16)
+ {
+ op_params.quantized_activation_min = std::numeric_limits<int16_t>::min();
+ op_params.quantized_activation_max = std::numeric_limits<int16_t>::max();
+ }
+ else if (output_type == DataType::S8)
+ {
+ op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
+ op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
+ }
+
+ op_params.input1_offset = 0; // symmetric
+ op_params.input2_offset = 0; // symmetric
+ op_params.output_offset = output_zp;
+
+ const double input_product_scale =
+ static_cast<double>(input1_scale) * static_cast<double>(input2_scale);
+ double effective_scale = input_product_scale / static_cast<double>(output_scale);
+ auto output_shift = static_cast<int>(op_params.output_shift);
+ kernels::quantizeMultiplier(effective_scale, &op_params.output_multiplier, &output_shift);
+ op_params.output_shift = output_shift;
+ return op_params;
+}
+
+void createGateParams(const circle::Tensor *input, const circle::Tensor *input_weight,
+ const circle::Tensor *input_bias, const circle::Tensor *hidden_state,
+ const circle::Tensor *hidden_state_weight,
+ const float nonlinear_activation_input_scale, const DataType cell_type,
+ lstm::GateParameters *gate_params)
+{
+ // Input CalculateOpDataFullyConnected
+ {
+ lstm::FullyConnectedParams input_gate_params;
+ double real_multiplier = 0.0;
+ int output_shift;
+ int32_t output_activation_min;
+ int32_t output_activation_max;
+ int32_t output_multiplier;
+ real_multiplier = kernels::getQuantizedConvolutionMultipler(
+ Tensor::scale(input), Tensor::scale(input_weight), nonlinear_activation_input_scale);
+ kernels::quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+ kernels::calculateActivationRangeQuantized(FusedActFunc::NONE, 0,
+ nonlinear_activation_input_scale, cell_type,
+ &output_activation_min, &output_activation_max);
+
+ input_gate_params.output_shift = output_shift;
+ input_gate_params.output_multiplier = output_multiplier;
+ input_gate_params.quantized_activation_max = output_activation_max;
+ input_gate_params.quantized_activation_min = output_activation_min;
+ input_gate_params.input_offset = -Tensor::zero_point(input);
+ input_gate_params.weights_offset = -Tensor::zero_point(input_weight);
+ input_gate_params.output_offset = 0;
+
+ gate_params->input_fc_params = input_gate_params;
+ }
+
+ // Recurrent CalculateOpDataFullyConnected
+ {
+ lstm::FullyConnectedParams recurrent_gate_params;
+ double real_multiplier = 0.0;
+ int output_shift;
+ int32_t output_activation_min;
+ int32_t output_activation_max;
+ int32_t output_multiplier;
+ real_multiplier = kernels::getQuantizedConvolutionMultipler(Tensor::scale(hidden_state),
+ Tensor::scale(hidden_state_weight),
+ nonlinear_activation_input_scale);
+ kernels::quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+ kernels::calculateActivationRangeQuantized(FusedActFunc::NONE, 0,
+ nonlinear_activation_input_scale, cell_type,
+ &output_activation_min, &output_activation_max);
+
+ recurrent_gate_params.output_shift = output_shift;
+ recurrent_gate_params.output_multiplier = output_multiplier;
+ recurrent_gate_params.quantized_activation_max = output_activation_max;
+ recurrent_gate_params.quantized_activation_min = output_activation_min;
+ recurrent_gate_params.input_offset = -Tensor::zero_point(hidden_state);
+ recurrent_gate_params.weights_offset = -Tensor::zero_point(hidden_state_weight);
+ recurrent_gate_params.output_offset = 0;
+
+ gate_params->recurrent_fc_params = recurrent_gate_params;
+ }
+}
+
+void prepareGateParamsInteger(lstm::LSTMStruct *lstm_struct,
+ lstm::LSTMParameters *quant_lstm_params)
+{
+ float nonlinear_input_scale = 0.00024414062; // 2^-12 Q3.12 -> Q0.15
+
+ createGateParams(lstm_struct->input(), lstm_struct->input_to_forget_weights(),
+ lstm_struct->forget_gate_bias(), lstm_struct->output_state(),
+ lstm_struct->recurrent_to_forget_weights(), nonlinear_input_scale, DataType::S16,
+ &quant_lstm_params->forget_gate_parameters);
+
+ createGateParams(lstm_struct->input(), lstm_struct->input_to_input_weights(),
+ lstm_struct->input_gate_bias(), lstm_struct->output_state(),
+ lstm_struct->recurrent_to_input_weights(), nonlinear_input_scale, DataType::S16,
+ &quant_lstm_params->input_gate_parameters);
+
+ // lstm::GateParameters cell_gate_parameters;
+ createGateParams(lstm_struct->input(), lstm_struct->input_to_cell_weights(),
+ lstm_struct->cell_gate_bias(), lstm_struct->output_state(),
+ lstm_struct->recurrent_to_cell_weights(), nonlinear_input_scale, DataType::S16,
+ &quant_lstm_params->cell_gate_parameters);
+
+ // lstm::GateParameters output_gate_parameters;
+ createGateParams(lstm_struct->input(), lstm_struct->input_to_output_weights(),
+ lstm_struct->output_gate_bias(), lstm_struct->output_state(),
+ lstm_struct->recurrent_to_output_weights(), nonlinear_input_scale, DataType::S16,
+ &quant_lstm_params->output_gate_parameters);
+
+ // Inter gate multiplication parameters
+ float nonlinear_output_scale = 0.00003051757; // 2^-15 Q3.12 -> Q0.15
+ float cell_state_scale =
+ Tensor::scale(lstm_struct->cell_state()); // lstm_tensors.CellStateTensor()->params.scale;
+ // forget gate output (nonlinear output) x cell state -> cell state
+ quant_lstm_params->inter_gate_parameters.forget_cell_mul_params = createInterGateParams(
+ nonlinear_output_scale, cell_state_scale, cell_state_scale, DataType::S16, 0);
+
+ // input gate output x cell gate output -> cell state
+ quant_lstm_params->inter_gate_parameters.input_mul_params = createInterGateParams(
+ nonlinear_output_scale, nonlinear_output_scale, cell_state_scale, DataType::S16, 0);
+
+ // tanh output x output gate output -> hidden state (potentially asymmetric)
+ quant_lstm_params->inter_gate_parameters.output_mul_params = createInterGateParams(
+ nonlinear_output_scale, nonlinear_output_scale, Tensor::scale(lstm_struct->output_state()),
+ Tensor::element_type(lstm_struct->output_state()),
+ Tensor::zero_point(lstm_struct->output_state()));
+}
+
+// Create the additional information about the cell state, which include:
+// cell_state_scale_power: used in integer nonlinear function (e.g., tanh)
+// quantized_cell_clip: quantized cell clip range
+lstm::CellStateInfo createLstmCellStateInfo(const float cell_state_scale, const float cell_clip)
+{
+ lstm::CellStateInfo cell_state_info;
+ // cell_state_scale_power: 2^-cell_state_scale_power = cell state scale
+ int buffer;
+ checkedLog2(cell_state_scale, &buffer);
+ cell_state_info.cell_state_scale_power = buffer;
+ // Cell state specifics
+ cell_state_info.cell_clip = cell_clip;
+ cell_state_info.quantized_cell_clip = static_cast<int16_t>(std::min(
+ std::max(static_cast<double>(cell_clip) / static_cast<double>(cell_state_scale), -32768.0),
+ 32767.0));
+ return cell_state_info;
+}
+
+void evalInt8(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph, bool)
+{
+ lstm::LSTMStruct lstm_struct(cur_op, runtime_graph);
+
+ lstm::LSTMParameters quant_lstm_params;
+ prepareGateParamsInteger(&lstm_struct, &quant_lstm_params);
+
+ lstm::CellStateInfo cell_state_info = createLstmCellStateInfo(
+ luci_interpreter::Tensor::scale(lstm_struct.cell_state()), lstm_struct.options->cell_clip());
+
+ const bool time_major = lstm_struct.options->time_major();
+ const auto batch_size =
+ time_major ? Tensor::dim(lstm_struct.input(), 1) : Tensor::dim(lstm_struct.input(), 0);
+ const auto state_dimension = Tensor::dim(lstm_struct.output_state(), 1);
+ const auto cell_state_type_size = getDataTypeSize(Tensor::element_type(lstm_struct.cell_state()));
+
+ auto scratch_0_data =
+ std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+ auto scratch_1_data =
+ std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+ auto scratch_2_data =
+ std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+ auto scratch_3_data =
+ std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+
+ // Create and fill with 0 output state tensor
+ auto output_state_data =
+ std::make_unique<int8_t[]>(Tensor::num_elements(lstm_struct.output_state()));
+ std::fill_n(output_state_data.get(), Tensor::num_elements(lstm_struct.output_state()), 0);
+
+ // Create and fill with 0 cell state tensor
+ auto cell_state_data =
+ std::make_unique<int16_t[]>(Tensor::num_elements(lstm_struct.cell_state()));
+ std::fill_n(cell_state_data.get(), Tensor::num_elements(lstm_struct.cell_state()), 0);
+
+ luci_interpreter_pal::evalLSTM<int8_t, int8_t, int16_t, int32_t>(
+ &lstm_struct, &quant_lstm_params, &cell_state_info, output_state_data.get(),
+ cell_state_data.get(), kernels::getTensorData<int16_t>(scratch_0_data.get()),
+ kernels::getTensorData<int16_t>(scratch_1_data.get()),
+ kernels::getTensorData<int16_t>(scratch_2_data.get()),
+ kernels::getTensorData<int16_t>(scratch_3_data.get()), runtime_graph);
+}
+
+#endif // DIS_QUANT
+
+#ifndef DIS_FLOAT
+lstm::FullyConnectedParams createFcParamsFloat()
+{
+ lstm::FullyConnectedParams op_params;
+ kernels::calculateActivationRange(FusedActFunc::NONE, &op_params.float_activation_min,
+ &op_params.float_activation_max);
+ return op_params;
+}
+
+lstm::GateParameters createGateParamsFloat()
+{
+ lstm::GateParameters gate_params;
+
+ gate_params.input_fc_params = createFcParamsFloat();
+ gate_params.recurrent_fc_params = createFcParamsFloat();
+
+ return gate_params;
+}
+
+lstm::CellStateInfo createLstmCellStateInfoFloat(const float cell_clip)
+{
+ lstm::CellStateInfo cell_state_info;
+ cell_state_info.cell_clip = cell_clip;
+ cell_state_info.cell_state_scale_power = 0; // no quantization
+ cell_state_info.quantized_cell_clip = 0; // no quantization
+ return cell_state_info;
+}
+
+void prepareGateParamsFloat(lstm::LSTMParameters *float_lstm_params)
+{
+ // Gate Parameters
+ float_lstm_params->forget_gate_parameters = createGateParamsFloat();
+ float_lstm_params->input_gate_parameters = createGateParamsFloat();
+ float_lstm_params->cell_gate_parameters = createGateParamsFloat();
+ float_lstm_params->output_gate_parameters = createGateParamsFloat();
+
+ // Inter gate multiplication parameters
+ lstm::ArithmeticParams op_params;
+ kernels::calculateActivationRange(FusedActFunc::NONE, &op_params.float_activation_min,
+ &op_params.float_activation_max);
+ float_lstm_params->inter_gate_parameters.forget_cell_mul_params = op_params;
+ float_lstm_params->inter_gate_parameters.input_mul_params = op_params;
+ float_lstm_params->inter_gate_parameters.output_mul_params = op_params;
+}
+
+void evalFloat(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph, bool)
+{
+ lstm::LSTMStruct lstm_struct(cur_op, runtime_graph);
+
+ lstm::CellStateInfo cell_state_info =
+ createLstmCellStateInfoFloat(lstm_struct.options->cell_clip());
+
+ lstm::LSTMParameters lstm_params;
+ prepareGateParamsFloat(&lstm_params);
+
+ const bool time_major = lstm_struct.options->time_major();
+ const auto batch_size =
+ time_major ? Tensor::dim(lstm_struct.input(), 1) : Tensor::dim(lstm_struct.input(), 0);
+ const auto state_dimension = Tensor::dim(lstm_struct.output_state(), 1);
+ const auto cell_state_type_size = getDataTypeSize(Tensor::element_type(lstm_struct.cell_state()));
+
+ auto scratch_0_data =
+ std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+ auto scratch_1_data =
+ std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+ auto scratch_2_data =
+ std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+ auto scratch_3_data =
+ std::make_unique<uint8_t[]>(batch_size * state_dimension * cell_state_type_size);
+
+ // Create and fill with 0 output state tensor
+ auto output_state_data =
+ std::make_unique<float[]>(Tensor::num_elements(lstm_struct.output_state()));
+ std::fill_n(output_state_data.get(), Tensor::num_elements(lstm_struct.output_state()), 0);
+
+ // Create and fill with 0 cell state tensor
+ auto cell_state_data = std::make_unique<float[]>(Tensor::num_elements(lstm_struct.cell_state()));
+ std::fill_n(cell_state_data.get(), Tensor::num_elements(lstm_struct.cell_state()), 0);
+
+ luci_interpreter_pal::evalLSTM<float, float, float, float>(
+ &lstm_struct, &lstm_params, &cell_state_info, output_state_data.get(), cell_state_data.get(),
+ kernels::getTensorData<float>(scratch_0_data.get()),
+ kernels::getTensorData<float>(scratch_1_data.get()),
+ kernels::getTensorData<float>(scratch_2_data.get()),
+ kernels::getTensorData<float>(scratch_3_data.get()), runtime_graph);
+}
+#endif // DIS_FLOAT
+
+void validateWeightTensorSize(const circle::Tensor *weight_tensor, int dim1_size, int dim2_size)
+{
+ LUCI_INTERPRETER_CHECK(Tensor::num_dims(weight_tensor) == 2);
+ LUCI_INTERPRETER_CHECK(Tensor::dim(weight_tensor, 0) == dim1_size);
+ LUCI_INTERPRETER_CHECK(Tensor::dim(weight_tensor, 1) == dim2_size);
+}
+
+void validateTensorsSize(lstm::LSTMStruct *lstm_struct, const bool time_major)
+{
+ const auto batch_size =
+ time_major ? Tensor::dim(lstm_struct->input(), 1) : Tensor::dim(lstm_struct->input(), 0);
+
+ const auto input_dimension = Tensor::dim(lstm_struct->input(), 2);
+ const auto state_dimension = Tensor::dim(lstm_struct->output_state(), 1);
+
+ // Input FC weights
+ for (int32_t i = 1; i < 5; i++)
+ {
+ validateWeightTensorSize(lstm_struct->get_internal_tensor(i), state_dimension, input_dimension);
+ }
+
+ // Recurrent FC weights
+ for (int32_t i = 5; i < 9; i++)
+ {
+ validateWeightTensorSize(lstm_struct->get_internal_tensor(i), state_dimension, state_dimension);
+ }
+
+ // Biases
+ for (int32_t i = 12; i < 16; i++)
+ {
+ LUCI_INTERPRETER_CHECK(Tensor::num_dims(lstm_struct->get_internal_tensor(i)) == 1);
+ LUCI_INTERPRETER_CHECK(Tensor::dim(lstm_struct->get_internal_tensor(i), 0) == state_dimension);
+ }
+
+ // Check the shape of input state tensors.
+ // These tensor may be 1D or 2D. It's fine as long as the total size is
+ // correct.
+ LUCI_INTERPRETER_CHECK(Tensor::num_elements(lstm_struct->output_state()) ==
+ batch_size * state_dimension);
+ LUCI_INTERPRETER_CHECK(Tensor::num_elements(lstm_struct->cell_state()) ==
+ batch_size * state_dimension);
+
+ // Check the shape of output tensor against that of input tensor
+ LUCI_INTERPRETER_CHECK(Tensor::num_dims(lstm_struct->output()) == 3);
+ LUCI_INTERPRETER_CHECK(Tensor::dim(lstm_struct->input(), 0) ==
+ Tensor::dim(lstm_struct->output(), 0));
+ LUCI_INTERPRETER_CHECK(Tensor::dim(lstm_struct->input(), 1) ==
+ Tensor::dim(lstm_struct->output(), 1));
+ LUCI_INTERPRETER_CHECK(Tensor::dim(lstm_struct->output(), 2) == state_dimension);
+}
+
+} // namespace
+
+void configure_kernel_CircleUnidirectionalSequenceLSTM(const circle::Operator *cur_op,
+ BaseRuntimeGraph *runtime_graph)
+{
+ lstm::LSTMStruct lstm_struct(cur_op, runtime_graph);
+
+ LUCI_INTERPRETER_CHECK(Tensor::element_type(lstm_struct.input()) == DataType::FLOAT32 or
+ Tensor::element_type(lstm_struct.input()) == DataType::S8);
+
+ lstm_struct.validateTensorTypes();
+
+ const bool time_major = lstm_struct.options->time_major();
+
+ validateTensorsSize(&lstm_struct, time_major);
+
+ // No peephole
+ for (int32_t i = 9; i < 12; ++i)
+ LUCI_INTERPRETER_CHECK(lstm_struct.get_internal_tensor(i) == nullptr);
+
+ // No projection
+ for (int32_t i = 16; i < 18; ++i)
+ LUCI_INTERPRETER_CHECK(lstm_struct.get_internal_tensor(i) == nullptr);
+
+ // No internal layer norm
+ for (int32_t i = 20; i < 24; ++i)
+ LUCI_INTERPRETER_CHECK(lstm_struct.get_internal_tensor(i) == nullptr);
+}
+
+void execute_kernel_CircleUnidirectionalSequenceLSTM(const circle::Operator *cur_op,
+ BaseRuntimeGraph *runtime_graph, bool in_place)
+{
+ const auto input_index = cur_op->inputs()->operator[](0);
+ assert(input_index != -1);
+
+ const auto input = runtime_graph->getCircleTensorByIndex(input_index);
+
+ switch (Tensor::element_type(input))
+ {
+#ifndef DIS_FLOAT
+ case DataType::FLOAT32:
+ evalFloat(cur_op, runtime_graph, in_place);
+ break;
+#endif // DIS_FLOAT
+#ifndef DIS_QUANT
+ case DataType::S8:
+ evalInt8(cur_op, runtime_graph, in_place);
+ break;
+#endif // DIS_QUANT
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+#define LUCI_INTERPRETER_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H
+
+#include "Utils.h"
+
+namespace luci_interpreter
+{
+namespace lstm
+{
+
+struct LSTMStruct
+{
+ LSTMStruct() = delete;
+ LSTMStruct(const LSTMStruct &) = delete;
+
+ explicit LSTMStruct(const circle::Operator *cur_op,
+ luci_interpreter::BaseRuntimeGraph *runtime_graph)
+ {
+ const auto input_index = cur_op->inputs()->operator[](0);
+ const auto input_to_input_weights_index = cur_op->inputs()->operator[](1);
+ const auto input_to_forget_weights_index = cur_op->inputs()->operator[](2);
+ const auto input_to_cell_weights_index = cur_op->inputs()->operator[](3);
+ const auto input_to_output_weights_index = cur_op->inputs()->operator[](4);
+ assert(input_index != -1);
+ // input_to_input_weights_index - optional
+ assert(input_to_forget_weights_index != -1);
+ assert(input_to_cell_weights_index != -1);
+ assert(input_to_output_weights_index != -1);
+ internal_tensors[0] = runtime_graph->getCircleTensorByIndex(input_index);
+ internal_tensors[1] = runtime_graph->getCircleTensorByIndex(input_to_input_weights_index);
+ internal_tensors[2] = runtime_graph->getCircleTensorByIndex(input_to_forget_weights_index);
+ internal_tensors[3] = runtime_graph->getCircleTensorByIndex(input_to_cell_weights_index);
+ internal_tensors[4] = runtime_graph->getCircleTensorByIndex(input_to_output_weights_index);
+
+ const auto recurrent_to_input_weights_index = cur_op->inputs()->operator[](5);
+ const auto recurrent_to_forget_weights_index = cur_op->inputs()->operator[](6);
+ const auto recurrent_to_cell_weights_index = cur_op->inputs()->operator[](7);
+ const auto recurrent_to_output_weights_index = cur_op->inputs()->operator[](8);
+ // recurrent_to_input_weights_index - optional
+ assert(recurrent_to_forget_weights_index != -1);
+ assert(recurrent_to_cell_weights_index != -1);
+ assert(recurrent_to_output_weights_index != -1);
+ internal_tensors[5] = runtime_graph->getCircleTensorByIndex(recurrent_to_input_weights_index);
+ internal_tensors[6] = runtime_graph->getCircleTensorByIndex(recurrent_to_forget_weights_index);
+ internal_tensors[7] = runtime_graph->getCircleTensorByIndex(recurrent_to_cell_weights_index);
+ internal_tensors[8] = runtime_graph->getCircleTensorByIndex(recurrent_to_output_weights_index);
+
+ const auto cell_to_input_weights_index = cur_op->inputs()->operator[](9);
+ const auto cell_to_forget_weights_index = cur_op->inputs()->operator[](10);
+ const auto cell_to_output_weights_index = cur_op->inputs()->operator[](11);
+ // optional cell_to_input_weights_index
+ // optional cell_to_forget_weights_index
+ // optional cell_to_output_weights_index
+ internal_tensors[9] = runtime_graph->getCircleTensorByIndex(cell_to_input_weights_index);
+ internal_tensors[10] = runtime_graph->getCircleTensorByIndex(cell_to_forget_weights_index);
+ internal_tensors[11] = runtime_graph->getCircleTensorByIndex(cell_to_output_weights_index);
+
+ const auto input_gate_bias_index = cur_op->inputs()->operator[](12);
+ const auto forget_gate_bias_index = cur_op->inputs()->operator[](13);
+ const auto cell_gate_bias_index = cur_op->inputs()->operator[](14);
+ const auto output_gate_bias_index = cur_op->inputs()->operator[](15);
+ // optional input_gate_bias_index
+ assert(forget_gate_bias_index != -1);
+ assert(cell_gate_bias_index != -1);
+ assert(output_gate_bias_index != -1);
+ internal_tensors[12] = runtime_graph->getCircleTensorByIndex(input_gate_bias_index);
+ internal_tensors[13] = runtime_graph->getCircleTensorByIndex(forget_gate_bias_index);
+ internal_tensors[14] = runtime_graph->getCircleTensorByIndex(cell_gate_bias_index);
+ internal_tensors[15] = runtime_graph->getCircleTensorByIndex(output_gate_bias_index);
+
+ const auto projection_weights_index = cur_op->inputs()->operator[](16);
+ const auto projection_bias_index = cur_op->inputs()->operator[](17);
+ // optional projection_weights_index
+ // optional projection_bias_index
+ internal_tensors[16] = runtime_graph->getCircleTensorByIndex(projection_weights_index);
+ internal_tensors[17] = runtime_graph->getCircleTensorByIndex(projection_bias_index);
+
+ const auto output_state_index = cur_op->inputs()->operator[](18);
+ const auto cell_state_index = cur_op->inputs()->operator[](19);
+ assert(output_state_index != -1);
+ assert(cell_state_index != -1);
+ internal_tensors[18] = runtime_graph->getCircleTensorByIndex(output_state_index);
+ internal_tensors[19] = runtime_graph->getCircleTensorByIndex(cell_state_index);
+
+ const auto input_layer_norm_coefficients_index = cur_op->inputs()->operator[](20);
+ const auto forget_layer_norm_coefficients_index = cur_op->inputs()->operator[](21);
+ const auto cell_layer_norm_coefficients_index = cur_op->inputs()->operator[](22);
+ const auto output_layer_norm_coefficients_index = cur_op->inputs()->operator[](23);
+ // optional input_layer_norm_coefficients_index
+ // optional forget_layer_norm_coefficients_index
+ // optional cell_layer_norm_coefficients_index
+ // optional output_layer_norm_coefficients_index
+ internal_tensors[20] =
+ runtime_graph->getCircleTensorByIndex(input_layer_norm_coefficients_index);
+ internal_tensors[21] =
+ runtime_graph->getCircleTensorByIndex(forget_layer_norm_coefficients_index);
+ internal_tensors[22] =
+ runtime_graph->getCircleTensorByIndex(cell_layer_norm_coefficients_index);
+ internal_tensors[23] =
+ runtime_graph->getCircleTensorByIndex(output_layer_norm_coefficients_index);
+
+ const auto output_index = cur_op->outputs()->operator[](0);
+ assert(output_index != -1);
+ output_internal = runtime_graph->getCircleTensorByIndex(output_index);
+
+ options = cur_op->builtin_options_as_UnidirectionalSequenceLSTMOptions();
+ }
+
+ void validateTensorTypes()
+ {
+ LUCI_INTERPRETER_CHECK(Tensor::element_type(input()) == Tensor::element_type(output_state()));
+ LUCI_INTERPRETER_CHECK(Tensor::element_type(output()) == Tensor::element_type(input()));
+
+ for (int32_t i = 1; i < 9; ++i)
+ {
+ LUCI_INTERPRETER_CHECK(internal_tensors[i] == nullptr or
+ Tensor::element_type(input_to_forget_weights()) ==
+ Tensor::element_type(internal_tensors[i]));
+ }
+
+ for (int32_t i = 12; i < 16; ++i)
+ {
+ LUCI_INTERPRETER_CHECK(internal_tensors[i] == nullptr or
+ Tensor::element_type(forget_gate_bias()) ==
+ Tensor::element_type(internal_tensors[i]));
+ }
+ }
+
+ const circle::Tensor *input() { return internal_tensors[0]; };
+
+ const circle::Tensor *input_to_input_weights() { return internal_tensors[1]; };
+ const circle::Tensor *input_to_forget_weights() { return internal_tensors[2]; };
+ const circle::Tensor *input_to_cell_weights() { return internal_tensors[3]; };
+ const circle::Tensor *input_to_output_weights() { return internal_tensors[4]; };
+
+ const circle::Tensor *recurrent_to_input_weights() { return internal_tensors[5]; };
+ const circle::Tensor *recurrent_to_forget_weights() { return internal_tensors[6]; };
+ const circle::Tensor *recurrent_to_cell_weights() { return internal_tensors[7]; };
+ const circle::Tensor *recurrent_to_output_weights() { return internal_tensors[8]; };
+
+ const circle::Tensor *cell_to_input_weights() { return internal_tensors[9]; };
+ const circle::Tensor *cell_to_forget_weights() { return internal_tensors[10]; };
+ const circle::Tensor *cell_to_output_weights() { return internal_tensors[11]; };
+
+ const circle::Tensor *input_gate_bias() { return internal_tensors[12]; };
+ const circle::Tensor *forget_gate_bias() { return internal_tensors[13]; };
+ const circle::Tensor *cell_gate_bias() { return internal_tensors[14]; };
+ const circle::Tensor *output_gate_bias() { return internal_tensors[15]; };
+
+ const circle::Tensor *projection_weights() { return internal_tensors[16]; };
+ const circle::Tensor *projection_bias() { return internal_tensors[17]; };
+
+ const circle::Tensor *output_state() { return internal_tensors[18]; };
+ const circle::Tensor *cell_state() { return internal_tensors[19]; };
+
+ const circle::Tensor *input_layer_norm_coefficients() { return internal_tensors[20]; };
+ const circle::Tensor *forget_layer_norm_coefficients() { return internal_tensors[21]; };
+ const circle::Tensor *cell_layer_norm_coefficients() { return internal_tensors[22]; };
+ const circle::Tensor *output_layer_norm_coefficients() { return internal_tensors[23]; };
+ const circle::Tensor *output() { return output_internal; };
+
+ const circle::UnidirectionalSequenceLSTMOptions *options;
+
+ const circle::Tensor *get_internal_tensor(int i) { return internal_tensors[i]; }
+
+private:
+ const circle::Tensor *output_internal;
+ const circle::Tensor *internal_tensors[24];
+};
+
+struct FullyConnectedParams
+{
+ int32_t input_offset;
+ int32_t weights_offset;
+ int32_t output_offset;
+ int32_t output_multiplier;
+ int32_t output_shift;
+ int32_t quantized_activation_min;
+ int32_t quantized_activation_max;
+ int32_t float_activation_min;
+ int32_t float_activation_max;
+};
+
+struct GateParameters
+{
+ FullyConnectedParams input_fc_params;
+ FullyConnectedParams recurrent_fc_params;
+};
+
+struct ArithmeticParams
+{
+ int32_t input1_offset;
+ int32_t input2_offset;
+ int32_t quantized_activation_min;
+ int32_t quantized_activation_max;
+ int32_t output_offset;
+ int32_t output_multiplier;
+ int32_t output_shift;
+ int32_t float_activation_min;
+ int32_t float_activation_max;
+};
+
+struct InterGateParameters
+{
+ ArithmeticParams forget_cell_mul_params;
+ ArithmeticParams input_mul_params;
+ ArithmeticParams output_mul_params;
+};
+
+struct CellStateInfo
+{
+ float cell_clip;
+ // clipping range for cell state only 16 bits cell is supported (could be
+ // generalized through templatation)
+ int16_t quantized_cell_clip;
+ // 2^-cell_state_scale_power = cell state scale, required by integer tanh
+ // computation
+ int32_t cell_state_scale_power;
+};
+
+struct LSTMParameters
+{
+ GateParameters forget_gate_parameters;
+ GateParameters input_gate_parameters;
+ GateParameters cell_gate_parameters;
+ GateParameters output_gate_parameters;
+ InterGateParameters inter_gate_parameters;
+};
+
+} // namespace lstm
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UNIDIRECTIONAL_SEQUENCE_LSTM_H
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/UnidirectionalSequenceLSTM.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class UnidirectionalSequenceLSTMTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// NOTE from NoCifgNoPeepholeNoProjectionNoClippingUnidirectionalLstmTest
+TEST_F(UnidirectionalSequenceLSTMTest, FloatTest)
+{
+ const int32_t n_batch = 1;
+ const int32_t n_input = 2;
+ const int32_t n_cell = 4;
+ const int32_t n_output = 4;
+ const int32_t sequence_length = 3;
+
+ std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589, -0.34550029,
+ 0.04266912, -0.15680569, -0.34856534, 0.43890524};
+
+ std::vector<float> input_to_cell_weights = {-0.50013041, 0.1370284, 0.11810488, 0.2013163,
+ -0.20583314, 0.44344562, 0.22077113, -0.29909778};
+
+ std::vector<float> input_to_forget_weights = {0.09701663, 0.20334584, -0.50592935, -0.31343272,
+ -0.40032279, 0.44781327, 0.01387155, -0.35593212};
+
+ std::vector<float> input_to_output_weights = {-0.25065863, -0.28290087, 0.04613829, 0.40525138,
+ 0.44272184, 0.03897077, -0.1556896, 0.19487578};
+
+ std::vector<float> input_gate_bias = {0., 0., 0., 0.};
+ std::vector<float> forget_gate_bias = {1., 1., 1., 1.};
+ std::vector<float> cell_gate_bias = {0., 0., 0., 0.};
+ std::vector<float> output_gate_bias = {0., 0., 0., 0.};
+
+ std::vector<float> recurrent_to_input_weights = {
+ -0.0063535, -0.2042388, 0.31454784, -0.35746509, 0.28902304, 0.08183324,
+ -0.16555229, 0.02286911, -0.13566875, 0.03034258, 0.48091322, -0.12528998,
+ 0.24077177, -0.51332325, -0.33502164, 0.10629296};
+
+ std::vector<float> recurrent_to_forget_weights = {
+ -0.48684245, -0.06655136, 0.42224967, 0.2112639, 0.27654213, 0.20864892,
+ -0.07646349, 0.45877004, 0.00141793, -0.14609534, 0.36447752, 0.09196436,
+ 0.28053468, 0.01560611, -0.20127171, -0.01140004};
+
+ std::vector<float> recurrent_to_cell_weights = {
+ -0.3407414, 0.24443203, -0.2078532, 0.26320225, 0.05695659, -0.00123841,
+ -0.4744786, -0.35869038, -0.06418842, -0.13502428, -0.501764, 0.22830659,
+ -0.46367589, 0.26016325, -0.03894562, -0.16368064};
+
+ std::vector<float> recurrent_to_output_weights = {
+ 0.43385774, -0.17194885, 0.2718237, 0.09215671, 0.24107647, -0.39835793,
+ 0.18212086, 0.01301402, 0.48572797, -0.50656658, 0.20047462, -0.20607421,
+ -0.51818722, -0.15390486, 0.0468148, 0.39922136};
+
+ Shape input_to_input_weights_shape{n_cell, n_input};
+ Shape input_to_cell_weights_shape{n_cell, n_input};
+ Shape input_to_forget_weights_shape{n_cell, n_input};
+ Shape input_to_output_weights_shape{n_cell, n_input};
+
+ Shape input_gate_bias_shape{n_cell};
+ Shape forget_gate_bias_shape{n_cell};
+ Shape cell_gate_bias_shape{n_cell};
+ Shape output_gate_bias_shape{n_cell};
+
+ Shape recurrent_to_input_weights_shape{n_cell, n_output};
+ Shape recurrent_to_cell_weights_shape{n_cell, n_output};
+ Shape recurrent_to_forget_weights_shape{n_cell, n_output};
+ Shape recurrent_to_output_weights_shape{n_cell, n_output};
+
+ Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+ Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get());
+ Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get());
+ Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_output_weights_shape, input_to_output_weights, _memory_manager.get());
+
+ Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_gate_bias_shape, input_gate_bias, _memory_manager.get());
+ Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ forget_gate_bias_shape, forget_gate_bias, _memory_manager.get());
+ Tensor cell_gate_bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get());
+ Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ output_gate_bias_shape, output_gate_bias, _memory_manager.get());
+
+ Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get());
+ Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get());
+ Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get());
+ Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get());
+
+ std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+ Shape input_shape{sequence_length, n_batch, n_input};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+ Shape output_state_shape{n_batch, n_output};
+ Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ output_state_tensor.resize(output_state_shape);
+
+ Shape cell_state_shape{n_batch, n_cell};
+ Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ cell_state_tensor.resize(cell_state_shape);
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+ UnidirectionalSequenceLSTMParams params{};
+ params.activation = Activation::TANH;
+ params.cell_clip = 0.0;
+ params.proj_clip = 0.0;
+ params.time_major = true;
+ params.asymmetric_quantize_inputs = false;
+
+ UnidirectionalSequenceLSTM kernel(
+ &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor,
+ &input_to_cell_weights_tensor, &input_to_output_weights_tensor,
+ &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor,
+ &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr,
+ nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor,
+ &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr,
+ nullptr, nullptr, nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(output_state_tensor);
+ _memory_manager->allocate_memory(cell_state_tensor);
+ _memory_manager->allocate_memory(scratchpad_1);
+ _memory_manager->allocate_memory(scratchpad_2);
+ _memory_manager->allocate_memory(scratchpad_3);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{-0.02973187, 0.1229473, 0.20885126, -0.15358765,
+ -0.03716109, 0.12507336, 0.41193449, -0.20860538,
+ -0.15053082, 0.09120187, 0.24278517, -0.12222792};
+
+ std::vector<float> ref_output_shape{sequence_length, n_batch, n_output};
+ const float tolerance = 1e-5;
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, FloatTest_batch)
+{
+ const int32_t n_batch = 1;
+ const int32_t n_input = 2;
+ const int32_t n_cell = 4;
+ const int32_t n_output = 4;
+ const int32_t sequence_length = 3;
+
+ std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589, -0.34550029,
+ 0.04266912, -0.15680569, -0.34856534, 0.43890524};
+
+ std::vector<float> input_to_cell_weights = {-0.50013041, 0.1370284, 0.11810488, 0.2013163,
+ -0.20583314, 0.44344562, 0.22077113, -0.29909778};
+
+ std::vector<float> input_to_forget_weights = {0.09701663, 0.20334584, -0.50592935, -0.31343272,
+ -0.40032279, 0.44781327, 0.01387155, -0.35593212};
+
+ std::vector<float> input_to_output_weights = {-0.25065863, -0.28290087, 0.04613829, 0.40525138,
+ 0.44272184, 0.03897077, -0.1556896, 0.19487578};
+
+ std::vector<float> input_gate_bias = {0., 0., 0., 0.};
+ std::vector<float> forget_gate_bias = {1., 1., 1., 1.};
+ std::vector<float> cell_gate_bias = {0., 0., 0., 0.};
+ std::vector<float> output_gate_bias = {0., 0., 0., 0.};
+
+ std::vector<float> recurrent_to_input_weights = {
+ -0.0063535, -0.2042388, 0.31454784, -0.35746509, 0.28902304, 0.08183324,
+ -0.16555229, 0.02286911, -0.13566875, 0.03034258, 0.48091322, -0.12528998,
+ 0.24077177, -0.51332325, -0.33502164, 0.10629296};
+
+ std::vector<float> recurrent_to_forget_weights = {
+ -0.48684245, -0.06655136, 0.42224967, 0.2112639, 0.27654213, 0.20864892,
+ -0.07646349, 0.45877004, 0.00141793, -0.14609534, 0.36447752, 0.09196436,
+ 0.28053468, 0.01560611, -0.20127171, -0.01140004};
+
+ std::vector<float> recurrent_to_cell_weights = {
+ -0.3407414, 0.24443203, -0.2078532, 0.26320225, 0.05695659, -0.00123841,
+ -0.4744786, -0.35869038, -0.06418842, -0.13502428, -0.501764, 0.22830659,
+ -0.46367589, 0.26016325, -0.03894562, -0.16368064};
+
+ std::vector<float> recurrent_to_output_weights = {
+ 0.43385774, -0.17194885, 0.2718237, 0.09215671, 0.24107647, -0.39835793,
+ 0.18212086, 0.01301402, 0.48572797, -0.50656658, 0.20047462, -0.20607421,
+ -0.51818722, -0.15390486, 0.0468148, 0.39922136};
+
+ Shape input_to_input_weights_shape{n_cell, n_input};
+ Shape input_to_cell_weights_shape{n_cell, n_input};
+ Shape input_to_forget_weights_shape{n_cell, n_input};
+ Shape input_to_output_weights_shape{n_cell, n_input};
+
+ Shape input_gate_bias_shape{n_cell};
+ Shape forget_gate_bias_shape{n_cell};
+ Shape cell_gate_bias_shape{n_cell};
+ Shape output_gate_bias_shape{n_cell};
+
+ Shape recurrent_to_input_weights_shape{n_cell, n_output};
+ Shape recurrent_to_cell_weights_shape{n_cell, n_output};
+ Shape recurrent_to_forget_weights_shape{n_cell, n_output};
+ Shape recurrent_to_output_weights_shape{n_cell, n_output};
+
+ Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+ Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get());
+ Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get());
+ Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_output_weights_shape, input_to_output_weights, _memory_manager.get());
+
+ Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_gate_bias_shape, input_gate_bias, _memory_manager.get());
+ Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ forget_gate_bias_shape, forget_gate_bias, _memory_manager.get());
+ Tensor cell_gate_bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get());
+ Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ output_gate_bias_shape, output_gate_bias, _memory_manager.get());
+
+ Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get());
+ Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get());
+ Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get());
+ Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get());
+
+ std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+ Shape input_shape{n_batch, sequence_length, n_input};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+ Shape output_state_shape{n_batch, n_output};
+ Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ output_state_tensor.resize(output_state_shape);
+
+ Shape cell_state_shape{n_batch, n_cell};
+ Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ cell_state_tensor.resize(cell_state_shape);
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+
+ UnidirectionalSequenceLSTMParams params{};
+ params.activation = Activation::TANH;
+ params.cell_clip = 0.0;
+ params.proj_clip = 0.0;
+ params.time_major = false;
+ params.asymmetric_quantize_inputs = false;
+
+ UnidirectionalSequenceLSTM kernel(
+ &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor,
+ &input_to_cell_weights_tensor, &input_to_output_weights_tensor,
+ &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor,
+ &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr,
+ nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor,
+ &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr,
+ nullptr, nullptr, nullptr, &output_tensor, &output_state_tensor, &cell_state_tensor,
+ &scratchpad_1, params);
+
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(output_state_tensor);
+ _memory_manager->allocate_memory(cell_state_tensor);
+ _memory_manager->allocate_memory(scratchpad_1);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{-0.02973187, 0.1229473, 0.20885126, -0.15358765,
+ -0.03716109, 0.12507336, 0.41193449, -0.20860538,
+ -0.15053082, 0.09120187, 0.24278517, -0.12222792};
+
+ std::vector<float> ref_output_shape{n_batch, sequence_length, n_output};
+ const float tolerance = 1e-5;
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, FloatTest_simple)
+{
+ const int32_t n_batch = 1;
+ const int32_t n_input = 1;
+ const int32_t n_cell = 1;
+ const int32_t n_output = 1;
+ const int32_t sequence_length = 1;
+
+ std::vector<float> input_to_input_weights = {0.329067};
+ std::vector<float> input_to_forget_weights = {0.308059};
+ std::vector<float> input_to_cell_weights = {0.152916};
+ std::vector<float> input_to_output_weights = {-0.476033};
+
+ std::vector<float> input_gate_bias = {0.};
+ std::vector<float> forget_gate_bias = {1.};
+ std::vector<float> cell_gate_bias = {0.};
+ std::vector<float> output_gate_bias = {0.};
+
+ std::vector<float> recurrent_to_input_weights = {0.207806};
+ std::vector<float> recurrent_to_forget_weights = {0.028718};
+ std::vector<float> recurrent_to_cell_weights = {-0.182756};
+ std::vector<float> recurrent_to_output_weights = {-0.960517};
+
+ Shape input_to_input_weights_shape{n_cell, n_input};
+ Shape input_to_cell_weights_shape{n_cell, n_input};
+ Shape input_to_forget_weights_shape{n_cell, n_input};
+ Shape input_to_output_weights_shape{n_cell, n_input};
+
+ Shape input_gate_bias_shape{n_cell};
+ Shape forget_gate_bias_shape{n_cell};
+ Shape cell_gate_bias_shape{n_cell};
+ Shape output_gate_bias_shape{n_cell};
+
+ Shape recurrent_to_input_weights_shape{n_cell, n_output};
+ Shape recurrent_to_cell_weights_shape{n_cell, n_output};
+ Shape recurrent_to_forget_weights_shape{n_cell, n_output};
+ Shape recurrent_to_output_weights_shape{n_cell, n_output};
+
+ Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+ Tensor input_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_cell_weights_shape, input_to_cell_weights, _memory_manager.get());
+ Tensor input_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_forget_weights_shape, input_to_forget_weights, _memory_manager.get());
+ Tensor input_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_output_weights_shape, input_to_output_weights, _memory_manager.get());
+
+ Tensor input_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_gate_bias_shape, input_gate_bias, _memory_manager.get());
+ Tensor forget_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ forget_gate_bias_shape, forget_gate_bias, _memory_manager.get());
+ Tensor cell_gate_bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(cell_gate_bias_shape, cell_gate_bias, _memory_manager.get());
+ Tensor output_gate_bias_tensor = makeInputTensor<DataType::FLOAT32>(
+ output_gate_bias_shape, output_gate_bias, _memory_manager.get());
+
+ Tensor recurrent_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_input_weights_shape, recurrent_to_input_weights, _memory_manager.get());
+ Tensor recurrent_to_cell_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_cell_weights_shape, recurrent_to_cell_weights, _memory_manager.get());
+ Tensor recurrent_to_forget_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_forget_weights_shape, recurrent_to_forget_weights, _memory_manager.get());
+ Tensor recurrent_to_output_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ recurrent_to_output_weights_shape, recurrent_to_output_weights, _memory_manager.get());
+
+ std::vector<float> input_data{0.03653763};
+ Shape input_shape{n_batch, sequence_length, n_input};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+ Shape output_state_shape{n_batch, n_output};
+ Tensor output_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ output_state_tensor.resize(output_state_shape);
+
+ Shape cell_state_shape{n_batch, n_cell};
+ Tensor cell_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ cell_state_tensor.resize(cell_state_shape);
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+
+ UnidirectionalSequenceLSTMParams params{};
+ params.activation = Activation::TANH;
+ params.cell_clip = 10.0;
+ params.proj_clip = 0.0;
+ params.time_major = false;
+ params.asymmetric_quantize_inputs = false;
+
+ UnidirectionalSequenceLSTM kernel(
+ &input_tensor, &input_to_input_weights_tensor, &input_to_forget_weights_tensor,
+ &input_to_cell_weights_tensor, &input_to_output_weights_tensor,
+ &recurrent_to_input_weights_tensor, &recurrent_to_forget_weights_tensor,
+ &recurrent_to_cell_weights_tensor, &recurrent_to_output_weights_tensor, nullptr, nullptr,
+ nullptr, &input_gate_bias_tensor, &forget_gate_bias_tensor, &cell_gate_bias_tensor,
+ &output_gate_bias_tensor, nullptr, nullptr, &output_state_tensor, &cell_state_tensor, nullptr,
+ nullptr, nullptr, nullptr, &output_tensor, &output_state_tensor, &cell_state_tensor,
+ &scratchpad_1, params);
+
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(output_state_tensor);
+ _memory_manager->allocate_memory(cell_state_tensor);
+ _memory_manager->allocate_memory(scratchpad_1);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0.00139296};
+ std::vector<float> ref_output_shape{n_batch, sequence_length, n_output};
+ const float tolerance = 1e-5;
+ auto aa = extractTensorData<float>(output_tensor);
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, Unsupported_Type_Configure_NEG)
+{
+ const int32_t n_batch = 1;
+ const int32_t n_input = 2;
+ const int32_t n_cell = 4;
+ const int32_t n_output = 4;
+ const int32_t sequence_length = 3;
+
+ std::vector<int8_t> input_data{2, 3, 3, 4, 1, 1}; // int8 is not support as of now
+ Shape input_shape{sequence_length, n_batch, n_input};
+ Tensor input_tensor =
+ makeInputTensor<DataType::S8>(input_shape, input_data, _memory_manager.get());
+
+ std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589, -0.34550029,
+ 0.04266912, -0.15680569, -0.34856534, 0.43890524};
+ Shape input_to_input_weights_shape{n_cell, n_input};
+ Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+ UnidirectionalSequenceLSTMParams params{};
+ params.activation = Activation::TANH;
+ params.cell_clip = 0.0;
+ params.proj_clip = 0.0;
+ params.time_major = true;
+ params.asymmetric_quantize_inputs = false;
+
+ UnidirectionalSequenceLSTM kernel(
+ &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr,
+ nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, Invalid_Input_Shape_NEG)
+{
+ const int32_t n_batch = 1;
+ const int32_t n_input = 2;
+ const int32_t n_cell = 4;
+ const int32_t n_output = 4;
+ const int32_t sequence_length = 3;
+
+ std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+ Shape input_shape{sequence_length, n_input}; // this is wrong
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+ std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589, -0.34550029,
+ 0.04266912, -0.15680569, -0.34856534, 0.43890524};
+ Shape input_to_input_weights_shape{n_cell, n_input};
+ Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+ UnidirectionalSequenceLSTMParams params{};
+ params.activation = Activation::TANH;
+ params.cell_clip = 0.0;
+ params.proj_clip = 0.0;
+ params.time_major = true;
+ params.asymmetric_quantize_inputs = false;
+
+ UnidirectionalSequenceLSTM kernel(
+ &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr,
+ nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(UnidirectionalSequenceLSTMTest, Invalid_Input_Shape_2_NEG)
+{
+ const int32_t n_batch = 1;
+ const int32_t n_input = 2;
+ const int32_t n_cell = 4;
+ const int32_t n_output = 4;
+ const int32_t sequence_length = 3;
+
+ std::vector<float> input_data{2., 3., 3., 4., 1., 1.};
+ Shape input_shape{sequence_length, n_batch, n_input};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+
+ std::vector<float> input_to_input_weights = {-0.45018822, -0.02338299, -0.0870589, -0.34550029,
+ 0.04266912, -0.15680569, -0.34856534, 0.43890524};
+ Shape input_to_input_weights_shape{n_cell, n_input};
+ Tensor input_to_input_weights_tensor = makeInputTensor<DataType::FLOAT32>(
+ input_to_input_weights_shape, input_to_input_weights, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+
+ UnidirectionalSequenceLSTMParams params{};
+ params.activation = Activation::TANH;
+ params.cell_clip = 0.0;
+ params.proj_clip = 0.0;
+ params.time_major = true;
+ params.asymmetric_quantize_inputs = false;
+
+ // NOTE provide wrong shaped inputs
+ UnidirectionalSequenceLSTM kernel(
+ &input_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ nullptr, nullptr, nullptr, &input_to_input_weights_tensor, &input_to_input_weights_tensor,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr,
+ &input_to_input_weights_tensor, &input_to_input_weights_tensor, nullptr, nullptr, nullptr,
+ nullptr, &output_tensor, &scratchpad_1, &scratchpad_2, &scratchpad_3, params);
+
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Unpack.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Unpack::Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams ¶ms)
+ : KernelWithParams<UnpackParams>({input}, std::move(outputs), params)
+{
+}
+
+void Unpack::configure()
+{
+ const Shape &input_shape = input()->shape();
+
+ int axis = _params.axis;
+ if (axis < 0)
+ axis += input()->shape().num_dims();
+ assert(axis >= 0 && axis < input_shape.num_dims());
+
+ Shape output_shape(input_shape.num_dims() - 1);
+ int out_index = 0;
+ for (int in_index = 0; in_index < input_shape.num_dims(); ++in_index)
+ {
+ if (in_index != axis)
+ output_shape.dim(out_index++) = input_shape.dim(in_index);
+ }
+
+ // TODO: enable it only if kernel with dynamic shapes
+ for (Tensor *output : _outputs)
+ {
+ assert(output->element_type() == input()->element_type());
+ output->resize(output_shape);
+ }
+}
+
+template <typename T> void Unpack::executeImpl() const
+{
+ tflite::UnpackParams params{};
+ params.axis = _params.axis;
+ params.num_split = _outputs.size();
+ VectorOfTensors<T, false> all_outputs(_outputs);
+ tflite::reference_ops::Unpack<T>(params, getTensorShape(input()), getTensorData<T>(input()),
+ **all_outputs.shapes(), all_outputs.data());
+}
+
+void Unpack::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ return executeImpl<float>();
+ case DataType::U8:
+ return executeImpl<uint8_t>();
+ default:
+ assert(false && "Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Utils.h"
+
+#include <cassert>
+#include <cmath>
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
+{
+ switch (activation)
+ {
+ case Activation::NONE:
+ *activation_min = std::numeric_limits<T>::lowest();
+ *activation_max = std::numeric_limits<T>::max();
+ break;
+ case Activation::RELU:
+ *activation_min = 0;
+ *activation_max = std::numeric_limits<T>::max();
+ break;
+ case Activation::RELU_N1_TO_1:
+ *activation_min = -1;
+ *activation_max = 1;
+ break;
+ case Activation::RELU6:
+ *activation_min = 0;
+ *activation_max = 6;
+ break;
+ default:
+ assert(false && "Unsupported activation.");
+ }
+}
+
+void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row,
+ int32_t n_col, int32_t *output)
+{
+ for (int i = 0; i < n_row; ++i)
+ {
+ int32_t row_sum = 0;
+ for (int j = 0; j < n_col; ++j)
+ {
+ row_sum += *matrix++;
+ }
+ output[i] += row_sum * scalar;
+ }
+}
+
+template void calculateActivationRange(Activation activation, float *activation_min,
+ float *activation_max);
+template void calculateActivationRange(Activation activation, int32_t *activation_min,
+ int32_t *activation_max);
+template void calculateActivationRange(Activation activation, int64_t *activation_min,
+ int64_t *activation_max);
+
+#ifndef DIS_QUANT
+
+static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
+ int32_t zero_point, float scale,
+ int32_t *activation_min, int32_t *activation_max)
+{
+ auto quantize = [scale, zero_point](float x) {
+ return zero_point + static_cast<int32_t>(std::round(x / scale));
+ };
+
+ switch (activation)
+ {
+ case Activation::NONE:
+ case Activation::TANH:
+ *activation_min = qmin;
+ *activation_max = qmax;
+ break;
+ case Activation::RELU:
+ *activation_min = std::max(qmin, quantize(0.0f));
+ *activation_max = qmax;
+ break;
+ case Activation::RELU_N1_TO_1:
+ *activation_min = std::max(qmin, quantize(-1.0f));
+ *activation_max = std::min(qmax, quantize(1.0f));
+ break;
+ case Activation::RELU6:
+ *activation_min = std::max(qmin, quantize(0.0f));
+ *activation_max = std::min(qmax, quantize(6.0f));
+ break;
+ default:
+ assert(false && "Unsupported activation.");
+ }
+}
+
+static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
+ const circle::Tensor *output,
+ int32_t *activation_min, int32_t *activation_max)
+{
+ const float scale = Tensor::scale(output);
+ const int32_t zero_point = Tensor::zero_point(output);
+
+ calculateActivationRangeQuantizedImpl(activation, qmin, qmax, zero_point, zero_point,
+ activation_min, activation_max);
+}
+
+void calculateActivationRangeQuantized(Activation activation, int32_t output_zero_point,
+ float output_scale, DataType data_type,
+ int32_t *activation_min, int32_t *activation_max)
+{
+ int32_t qmin{};
+ int32_t qmax{};
+ switch (data_type)
+ {
+ case DataType::U8:
+ qmin = 0;
+ qmax = std::numeric_limits<uint8_t>::max();
+ break;
+ case DataType::S8:
+ qmin = -std::numeric_limits<int8_t>::max();
+ qmax = std::numeric_limits<int8_t>::max();
+ break;
+ case DataType::S16:
+ // For now, assume that signed int16 type implies signed symmetric quantization.
+ assert(output_zero_point == 0);
+ qmin = -std::numeric_limits<int16_t>::max();
+ qmax = std::numeric_limits<int16_t>::max();
+ break;
+ default:
+ assert(false && "Unsupported type.");
+ }
+
+ calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output_zero_point, output_scale,
+ activation_min, activation_max);
+}
+
+void calculateActivationRangeQuantized(Activation activation, const circle::Tensor *output,
+ int32_t *activation_min, int32_t *activation_max)
+{
+ assert(Tensor::zero_points(output).size() == 1);
+ const float scale = Tensor::scale(output);
+ const int32_t zero_point = Tensor::zero_point(output);
+ calculateActivationRangeQuantized(activation, zero_point, scale, Tensor::element_type(output),
+ activation_min, activation_max);
+}
+
+void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
+{
+ if (double_multiplier == 0.0)
+ {
+ *quantized_multiplier = 0;
+ *shift = 0;
+ return;
+ }
+
+ const double q = std::frexp(double_multiplier, shift);
+ auto q_fixed = static_cast<int64_t>(std::round(q * (int64_t(1) << 31)));
+
+ if (q_fixed == (int64_t(1) << 31))
+ {
+ q_fixed /= 2;
+ ++*shift;
+ }
+ assert(q_fixed <= std::numeric_limits<int32_t>::max());
+ // A shift amount smaller than -31 would cause all bits to be shifted out
+ // and thus all results would be zero. We implement that instead with
+ // q_fixed==0, so as to avoid hitting issues with right-shift
+ // operations with shift amounts greater than 31. Note that this happens
+ // roughly when abs(double_multiplier) < 2^-31 and the present handling means
+ // that we're effectively flushing tiny double_multiplier's to zero.
+ // We could conceivably handle values in the range (roughly) [32, 63]
+ // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
+ // the present handling is just doing 'flush denormals to zero'. We could
+ // reconsider and actually generate nonzero denormals if a need arises.
+ if (*shift < -31)
+ {
+ *shift = 0;
+ q_fixed = 0;
+ }
+ *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
+
+void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
+ int *left_shift)
+{
+ assert(double_multiplier < 1.0);
+ assert(double_multiplier > 0.0);
+ int shift;
+ quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
+ assert(shift <= 0);
+ *left_shift = shift;
+}
+#endif
+
+tflite::RuntimeShape calculateShapeForBroadcast(const circle::Tensor *input1,
+ const circle::Tensor *input2)
+{
+ const int num_input1_dims = Tensor::num_dims(input1);
+ const int num_input2_dims = Tensor::num_dims(input2);
+ const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
+ tflite::RuntimeShape output_shape(num_out_dims);
+
+ for (int i = 0; i < num_out_dims; ++i)
+ {
+ const int32_t input1_dim =
+ i < num_input1_dims ? Tensor::dim(input1, num_input1_dims - i - 1) : 1;
+ const int32_t input2_dim =
+ i < num_input2_dims ? Tensor::dim(input2, num_input2_dims - i - 1) : 1;
+
+ bool need_broadcast = input1_dim != input2_dim;
+ bool can_broadcast = input1_dim == 1 || input2_dim == 1;
+ LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
+
+ output_shape.SetDim(num_out_dims - i - 1, std::max(input1_dim, input2_dim));
+ }
+
+ return output_shape;
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UTILS_H
+#define LUCI_INTERPRETER_KERNELS_UTILS_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <tensorflow/lite/kernels/internal/types.h>
+#include <cassert>
+#include <cstdint>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+using Activation = luci_interpreter::FusedActFunc;
+
+#define LUCI_INTERPRETER_CHECK(cond) \
+ if (!(cond)) \
+ { \
+ assert(false && "LUCI_INTERPRETER_CHECK fails"); \
+ }
+
+inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
+ int32_t filter_size, int32_t out_size)
+{
+ const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+ const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
+ return padding > 0 ? padding : 0;
+}
+
+inline int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size,
+ int32_t filter_size, int32_t out_size, int32_t *offset)
+{
+ int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+ int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size);
+ total_padding = total_padding > 0 ? total_padding : 0;
+ *offset = total_padding % 2;
+ return total_padding / 2;
+}
+
+inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size,
+ int32_t stride, int32_t dilation_rate = 1)
+{
+ const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+ switch (padding)
+ {
+ case Padding::SAME:
+ return (image_size + stride - 1) / stride;
+ case Padding::VALID:
+ return (image_size + stride - effective_filter_size) / stride;
+ default:
+ assert(false);
+ return 0;
+ }
+}
+
+inline int32_t calcOffset(const circle::Tensor *tensor, int32_t d0, int32_t d1, int32_t d2,
+ int32_t d3)
+{
+
+ return ((d0 * Tensor::dim(tensor, 1) + d1) * Tensor::dim(tensor, 2) + d2) *
+ Tensor::dim(tensor, 3) +
+ d3;
+}
+
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
+
+tflite::RuntimeShape calculateShapeForBroadcast(const circle::Tensor *input1,
+ const circle::Tensor *input2);
+
+// Helper wrapper to hide broadcast logic
+template <typename T> class BroadcastableWrapper
+{
+public:
+ BroadcastableWrapper(const std::vector<T> &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {}
+
+ T operator[](int idx) { return _v[idx * _stride]; }
+
+private:
+ const std::vector<T> &_v;
+ int _stride;
+};
+
+inline tflite::RuntimeShape getTensorShape(const circle::Tensor *tensor)
+{
+ if (tensor == nullptr)
+ return tflite::RuntimeShape();
+
+ tflite::RuntimeShape runtime_shape(Tensor::num_dims(tensor));
+ for (int i = 0; i < Tensor::num_dims(tensor); ++i)
+ {
+ runtime_shape.SetDim(i, Tensor::dim(tensor, i));
+ }
+ return runtime_shape;
+}
+
+template <typename T> const T *getTensorData(const uint8_t *tensor_data)
+{
+ return tensor_data != nullptr ? reinterpret_cast<const T *>(tensor_data) : nullptr;
+}
+
+template <typename T> T *getTensorData(uint8_t *tensor_data)
+{
+ return tensor_data != nullptr ? reinterpret_cast<T *>(tensor_data) : nullptr;
+}
+
+// A list of tensors in a format that can be used by kernels like split and
+// concatenation.
+template <typename T, bool is_const> class VectorOfTensors
+{
+public:
+ using ElementT = typename std::conditional<is_const, const T, T>::type;
+ using TensorT = typename std::conditional<is_const, const Tensor, Tensor>::type;
+
+ // Build with the tensors in 'tensor_list'.
+ explicit VectorOfTensors(const std::vector<TensorT *> &tensor_list)
+ {
+ const int num_tensors = tensor_list.size();
+
+ all_data_.reserve(num_tensors);
+ all_shape_.reserve(num_tensors);
+ all_shape_ptr_.reserve(num_tensors);
+
+ for (TensorT *tensor : tensor_list)
+ {
+ all_data_.push_back(getTensorData<T>(tensor));
+ all_shape_.push_back(getTensorShape(tensor));
+ }
+
+ // Taking the pointer from inside a std::vector is only OK if the vector is
+ // never modified, so we populate all_shape in the previous loop and then we
+ // are free to grab iterators here.
+ for (tflite::RuntimeShape &shape : all_shape_)
+ {
+ all_shape_ptr_.push_back(&shape);
+ }
+ }
+ // Return a pointer to the data pointers of all tensors in the list. For
+ // example:
+ // float* const* f = v.data();
+ // f[0][1] is the second element of the first tensor.
+ ElementT *const *data() const { return all_data_.data(); }
+
+ // Return a pointer the shape pointers of all tensors in the list. For
+ // example:
+ // const RuntimeShape* const* d = v.dims();
+ // dims[1] are the dimensions of the second tensor in the list.
+ const tflite::RuntimeShape *const *shapes() const { return all_shape_ptr_.data(); }
+
+private:
+ std::vector<ElementT *> all_data_;
+ std::vector<tflite::RuntimeShape> all_shape_;
+ std::vector<tflite::RuntimeShape *> all_shape_ptr_;
+};
+
+#ifndef DIS_QUANT
+void calculateActivationRangeQuantized(Activation activation, const circle::Tensor *output,
+ int32_t *activation_min, int32_t *activation_max);
+void calculateActivationRangeQuantized(Activation activation, int32_t output_zero_point,
+ float output_scale, DataType data_type,
+ int32_t *activation_min, int32_t *activation_max);
+
+template <typename T> constexpr bool one_of_types() { return false; }
+
+// Checks if T is equal to one of {U,Other} types
+template <typename T, typename U, typename... Other> constexpr bool one_of_types()
+{
+ return std::is_same<T, U>::value || one_of_types<T, Other...>();
+}
+
+void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row,
+ int32_t n_col, int32_t *output);
+
+/**
+ * Fills activation min and max parameters depending on given data type and activation
+ *
+ * T is a template parameter, so after optimization this code left with only required if case
+ *
+ * @tparam T data type of arithmetic operation output tensor
+ * @param params tflite params to fill
+ * @param activation luci_interpreter::Activation of arithmetic operation
+ */
+template <typename T>
+void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act)
+{
+ static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype");
+
+ if (std::is_same<T, float>::value)
+ calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max);
+ if (std::is_same<T, int32_t>::value)
+ calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max);
+ else
+ calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max);
+}
+
+// Decompose a double multiplier into a Q0.31 int32 representation of its
+// significand, and shift representation of its exponent.
+//
+// Handles an arbitrary positive multiplier. The 'shift' output-value is
+// basically the 'floating-point exponent' of the multiplier:
+// Negative for a right-shift (when the multiplier is <1), positive for a
+// left-shift (when the multiplier is >1)
+void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
+
+// Decompose a double multiplier into a Q0.31 int32 representation of its
+// significand, and shift representation of NEGATIVE its exponent ---
+// this is intended as a RIGHT-shift.
+//
+// Restricted to the case where the multiplier < 1 (and non-negative).
+void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
+ int *left_shift);
+
+inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale,
+ float output_scale)
+{
+ const double input_product_scale = static_cast<double>(input_scale * filter_scale);
+ LUCI_INTERPRETER_CHECK(input_product_scale >= 0);
+ return input_product_scale / static_cast<double>(output_scale);
+}
+
+// TODO rename getQuantizedConvolutionMultiplers to something more general
+// it is used for non conv operators too
+inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale,
+ const std::vector<float> &filter_scale,
+ float output_scale)
+{
+ std::vector<double> effective_output_scales;
+ size_t n = filter_scale.size();
+ effective_output_scales.reserve(n);
+ for (size_t i = 0; i < n; ++i)
+ {
+ effective_output_scales.push_back(
+ getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
+ }
+ return effective_output_scales;
+}
+
+struct ChannelQuantMultipliers
+{
+ int shift;
+ int32_t multiplier;
+ ChannelQuantMultipliers() = default;
+};
+
+inline std::vector<ChannelQuantMultipliers>
+quantizeMultipliers(const std::vector<double> &effective_scale)
+{
+ size_t n = effective_scale.size();
+ std::vector<ChannelQuantMultipliers> params(n);
+ for (size_t i = 0; i < n; ++i)
+ {
+ quantizeMultiplier(effective_scale[i], ¶ms[i].multiplier, ¶ms[i].shift);
+ }
+ return params;
+}
+
+// A list of quantized tensors in a format that can be used by kernels like
+// split and concatenation.
+template <bool is_const> class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t, is_const>
+{
+public:
+ using typename VectorOfTensors<uint8_t, is_const>::TensorT;
+
+ // Build with the tensors in 'tensor_list'.
+ explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list)
+ : VectorOfTensors<uint8_t, is_const>(tensor_list)
+ {
+ for (TensorT *tensor : tensor_list)
+ {
+ zero_point_.push_back(tensor->zero_point());
+ scale_.push_back(tensor->scale());
+ }
+ }
+
+ const float *scale() const { return scale_.data(); }
+ const int32_t *zero_point() const { return zero_point_.data(); }
+
+private:
+ std::vector<int32_t> zero_point_;
+ std::vector<float> scale_;
+};
+#endif // DIS_QUANT
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UTILS_H
--- /dev/null
+set(SOURCES
+ GraphLoader.h
+ GraphLoader.cpp
+ ModuleLoader.h
+ ModuleLoader.cpp)
+
+add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
+
+target_link_libraries(${LUCI_INTERPRETER_LOADER}
+ PUBLIC ${LUCI_INTERPRETER_MEMORY_MANAGER} ${LUCI_INTERPRETER_CORE}
+ PRIVATE ${LUCI_INTERPRETER_KERNELS})
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/GraphLoader.h"
+
+namespace luci_interpreter
+{
+namespace
+{
+
+// TODO: add more operations
+bool isCouldBeEmplaceOperation(circle::BuiltinOperator op)
+{
+ switch (op)
+ {
+ case circle::BuiltinOperator_LOGISTIC:
+ case circle::BuiltinOperator_RESHAPE:
+ case circle::BuiltinOperator_EXPAND_DIMS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool isCouldBeEmplaceTensor(CircleReader *reader, const int32_t tensor_index)
+{
+ uint32_t usage_count = 0;
+ for (uint32_t i = 0; i < reader->operators().size(); ++i)
+ {
+ const auto op = reader->operators().at(i);
+ assert(op != nullptr);
+
+ for (int32_t j = 0; j < op->inputs()->size(); ++j)
+ {
+ const auto input_index = op->inputs()->operator[](j);
+ if (input_index == tensor_index)
+ usage_count++;
+
+ if (usage_count > 1)
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace
+
+void GraphLoader::checkInplaceOps(CircleReader *reader, RuntimeGraph *runtime_graph)
+{
+ for (uint32_t i = 0; i < reader->operators().size(); ++i)
+ {
+ const auto *op = reader->operators().at(i);
+ assert(op != nullptr);
+
+ bool is_graph_input = false;
+ for (int32_t j = 0; j < op->inputs()->size(); ++j)
+ {
+ const auto input_index = op->inputs()->operator[](j);
+ if (input_index == -1)
+ continue;
+
+ const auto &inputs_indexes = reader->inputs();
+
+ is_graph_input = (std::find(inputs_indexes.begin(), inputs_indexes.end(), input_index) !=
+ inputs_indexes.end()) or
+ is_graph_input;
+
+ if (not is_graph_input and isCouldBeEmplaceOperation(reader->builtin_code(op)) and
+ op->outputs()->size() == 1 and isCouldBeEmplaceTensor(reader, input_index))
+ {
+ runtime_graph->addInplaceOpIndex(i);
+ }
+ }
+ }
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
+#define LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
+
+#include "core/RuntimeGraph.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class GraphLoader
+{
+public:
+ static void checkInplaceOps(CircleReader *reader, RuntimeGraph *runtime_graph);
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleLoader.h"
+
+#include "GraphLoader.h"
+
+namespace luci_interpreter
+{
+
+void ModuleLoader::load(RuntimeModule *runtime_module, SimpleMemoryManager *memory_manager,
+ const char *model_data_raw)
+{
+ const circle::Model *model = circle::GetModel(model_data_raw);
+
+ CircleReader &reader = runtime_module->getCircleReader();
+ if (!reader.parse(model))
+ assert(false && "Error during parse");
+
+ for (size_t i = 0; i < reader.num_subgraph(); ++i)
+ {
+ runtime_module->addGraph(memory_manager);
+ }
+
+#ifndef USE_STATIC_ALLOC
+ for (size_t i = 0; i < reader.num_subgraph(); ++i)
+ {
+ if (!reader.select_subgraph(i))
+ assert(false && "Error during select subgraph");
+ auto *runtime_graph = runtime_module->getRuntimeGraphAt(i);
+ // For Dynamic memory manager we can use inplace optimization
+ GraphLoader::checkInplaceOps(&reader, runtime_graph);
+ }
+#endif // USE_STATIC_ALLOC
+
+ // For Dynamic Memory manager we build memory allocate/deallocate plan and then configure kernels.
+ // For Static Memory manager we only configure kernels.
+ for (size_t i = 0; i < reader.num_subgraph(); ++i)
+ {
+ auto *runtime_graph = runtime_module->getRuntimeGraphAt(i);
+#ifdef USE_STATIC_ALLOC
+ runtime_graph->configure_kernels();
+#else
+ runtime_graph->configure();
+#endif // USE_STATIC_ALLOC
+ }
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_MODULELOADER_H
+#define LUCI_INTERPRETER_LOADER_MODULELOADER_H
+
+#include "core/RuntimeModule.h"
+#include "luci_interpreter/core/reader/CircleMicroReader.h"
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class ModuleLoader
+{
+public:
+ static void load(RuntimeModule *runtime_module, MemoryManager *memory_manager,
+ const char *model_data_raw);
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_MODULELOADER_H
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Add.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAdd(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input1 = inputs.at(0);
+ const Tensor *input2 = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsAddOptions();
+
+ AddParams params{};
+ params.activation = luci_actfunc(options->fused_activation_function);
+
+ return std::make_unique<kernels::Add>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ArgMax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleArgMax(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *axis = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsArgMaxOptions();
+
+ ArgMaxParams params{};
+ params.output_type = static_cast<DataType>(options->output_type);
+
+ return std::make_unique<kernels::ArgMax>(input, axis, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/AveragePool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsPool2DOptions();
+
+ Pool2DParams params{};
+ params.padding = luci_padding(options->padding);
+ params.filter_height = options->filter_height;
+ params.filter_width = options->filter_width;
+ params.stride_height = options->stride_h;
+ params.stride_width = options->stride_w;
+ params.activation = luci_actfunc(options->fused_activation_function);
+
+ // It is unknown what data will be stored in scratchpad tensor,
+ // using UINT8 as a most general option
+ auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), nullptr);
+ scratchpad->set_data_buffer(nullptr);
+ // TODO move tensors offset initialization to one place
+ // TODO handle with static manager
+ Tensor *tmp = builder.get_runtime_graph()->addTensor(std::move(scratchpad));
+
+ return std::make_unique<kernels::AveragePool2D>(input, output, tmp, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchMatMul.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *lhs = inputs.at(0);
+ const Tensor *rhs = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ auto lhs_scratchpad = std::make_unique<Tensor>(lhs->element_type(), Shape({}), nullptr);
+ lhs_scratchpad->set_data_buffer(nullptr);
+ auto rhs_scratchpad = std::make_unique<Tensor>(rhs->element_type(), Shape({}), nullptr);
+ rhs_scratchpad->set_data_buffer(nullptr);
+ // TODO move tensors offset initialization to one place
+ // TODO handle with StaticManager
+ Tensor *lhs_tmp = builder.get_runtime_graph()->addTensor(std::move(lhs_scratchpad));
+ Tensor *rhs_tmp = builder.get_runtime_graph()->addTensor(std::move(rhs_scratchpad));
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsBatchMatMulOptions();
+
+ BatchMatMulParams params;
+ params.adj_x = options->adjoint_lhs;
+ params.adj_y = options->adjoint_rhs;
+
+ return std::make_unique<kernels::BatchMatMul>(lhs, rhs, output, lhs_tmp, rhs_tmp, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchToSpaceND.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchToSpaceND(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 3);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *block_shape = inputs.at(1);
+ const Tensor *crops = inputs.at(2);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Cast.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleCast(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Cast>(input, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Concatenation.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleConcatenation(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ std::vector<const Tensor *> input_tensors(inputs.size());
+ for (uint32_t i = 0; i < inputs.size(); ++i)
+ {
+ input_tensors[i] = inputs.at(i);
+ }
+ Tensor *output = outputs.at(0);
+ ;
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsConcatenationOptions();
+
+ ConcatenationParams params{};
+ params.axis = options->axis;
+ params.activation = luci_actfunc(options->fused_activation_function);
+
+ return std::make_unique<kernels::Concatenation>(std::move(input_tensors), output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Conv2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleConv2D(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 3);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *filter = inputs.at(1);
+ const Tensor *bias = inputs.at(2);
+ Tensor *output = outputs.at(0);
+
+ // It is unknown what data will be stored in scratchpad tensor,
+ // using UINT8 as a most general option
+ auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), nullptr);
+ scratchpad->set_data_buffer(nullptr);
+ // TODO move tensors offset initialization to one place
+ // TODO handle with StaticManager
+ Tensor *tmp = builder.get_runtime_graph()->addTensor(std::move(scratchpad));
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsConv2DOptions();
+
+ Conv2DParams params{};
+ params.padding = luci_padding(options->padding);
+ params.stride_height = options->stride_h;
+ params.stride_width = options->stride_w;
+ params.dilation_height_factor = options->dilation_h_factor;
+ params.dilation_width_factor = options->dilation_w_factor;
+ params.activation = luci_actfunc(options->fused_activation_function);
+
+ return std::make_unique<kernels::Conv2D>(input, filter, bias, output, tmp, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/DepthToSpace.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDepthToSpace(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsDepthToSpaceOptions();
+
+ DepthToSpaceParams params{};
+ params.block_size = options->block_size;
+
+ return std::make_unique<kernels::DepthToSpace>(input, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/DepthwiseConv2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 3);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *filter = inputs.at(1);
+ const Tensor *bias = inputs.at(2);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsDepthwiseConv2DOptions();
+
+ DepthwiseConv2DParams params{};
+ params.padding = luci_padding(options->padding);
+ params.depth_multiplier = options->depth_multiplier;
+ params.stride_height = options->stride_h;
+ params.stride_width = options->stride_w;
+ params.dilation_height_factor = options->dilation_h_factor;
+ params.dilation_width_factor = options->dilation_w_factor;
+ params.activation = luci_actfunc(options->fused_activation_function);
+
+ // It is unknown what data will be stored in scratchpad tensor,
+ // using UINT8 as a most general option
+ auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), nullptr);
+ scratchpad->set_data_buffer(nullptr);
+ // TODO move tensors offset initialization to one place
+ // TODO handle with StaticManager
+ Tensor *tmp = builder.get_runtime_graph()->addTensor(std::move(scratchpad));
+
+ return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, tmp, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Dequantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDequantize(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Dequantize>(input, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Div.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDiv(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+ const Tensor *input1 = inputs.at(0);
+ const Tensor *input2 = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsDivOptions();
+
+ DivParams params{};
+ params.activation = luci_actfunc(options->fused_activation_function);
+
+ return std::make_unique<kernels::Div>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Elu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleElu(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Elu>(input, output);
+}
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Equal.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleEqual(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *x = inputs.at(0);
+ const Tensor *y = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Equal>(x, y, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Exp.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExp(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Exp>(input, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ExpandDims.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExpandDims(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+ const Tensor *input = inputs.at(0);
+ const Tensor *axis = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::ExpandDims>(input, axis, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Fill.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFill(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+ const Tensor *dims = inputs.at(0);
+ const Tensor *value = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Fill>(dims, value, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Floor.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloor(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Floor>(input, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FloorDiv.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloorDiv(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+ const Tensor *x = inputs.at(0);
+ const Tensor *y = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::FloorDiv>(x, y, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FullyConnected.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 3);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *weights = inputs.at(1);
+ const Tensor *bias = inputs.at(2);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsFullyConnectedOptions();
+
+ FullyConnectedParams params{};
+ params.activation = luci_actfunc(options->fused_activation_function);
+ params.keep_num_dims = options->keep_num_dims;
+
+ return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Gather.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGather(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+ const Tensor *params = inputs.at(0);
+ const Tensor *indices = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsGatherOptions();
+
+ GatherParams gparams{};
+ gparams.axis = options->axis;
+ // TODO support batch_dims
+ gparams.batch_dims = 0;
+
+ return std::make_unique<kernels::Gather>(params, indices, output, gparams);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Greater.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGreater(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+ const Tensor *x = inputs.at(0);
+ const Tensor *y = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Greater>(x, y, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/GreaterEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGreaterEqual(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+ const Tensor *x = inputs.at(0);
+ const Tensor *y = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::GreaterEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/If.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleIf(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ // TODO: support IF operation
+ assert(false && "Not supported now");
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/InstanceNorm.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleInstanceNorm(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 3);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *gamma = inputs.at(1);
+ const Tensor *beta = inputs.at(2);
+
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsInstanceNormOptions();
+
+ InstanceNormParams params{};
+ params.epsilon = options->epsilon;
+ params.activation = luci_actfunc(options->fused_activation_function);
+
+ return std::make_unique<kernels::InstanceNorm>(input, gamma, beta, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/L2Normalize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleL2Normalize(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsL2NormOptions();
+
+ L2NormParams params{};
+ params.activation = luci_actfunc(options->fused_activation_function);
+
+ return std::make_unique<kernels::L2Normalize>(input, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/L2Pool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleL2Pool2D(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsPool2DOptions();
+
+ Pool2DParams params{};
+ params.padding = luci_padding(options->padding);
+ params.filter_height = options->filter_height;
+ params.filter_width = options->filter_width;
+ params.stride_height = options->stride_h;
+ params.stride_width = options->stride_w;
+ params.activation = luci_actfunc(options->fused_activation_function);
+
+ return std::make_unique<kernels::L2Pool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LeakyRelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLeakyRelu(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsLeakyReluOptions();
+
+ LeakyReluParams params{};
+ params.alpha = options->alpha;
+
+ return std::make_unique<kernels::LeakyRelu>(input, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Less.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLess(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *x = inputs.at(0);
+ const Tensor *y = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Less>(x, y, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LessEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLessEqual(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *x = inputs.at(0);
+ const Tensor *y = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::LessEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LocalResponseNormalization.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleLocalResponseNormalization(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsLocalResponseNormalizationOptions();
+
+ LocalResponseNormalizationParams params{};
+ params.radius = options->radius;
+ params.bias = options->bias;
+ params.alpha = options->alpha;
+ params.beta = options->beta;
+
+ return std::make_unique<kernels::LocalResponseNormalization>(input, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogSoftmax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogSoftmax(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::LogSoftmax>(input, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalAnd.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalAnd(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input1 = inputs.at(0);
+ const Tensor *input2 = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::LogicalAnd>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalNot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalNot(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::LogicalNot>(input, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalOr.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalOr(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input1 = inputs.at(0);
+ const Tensor *input2 = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::LogicalOr>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Logistic.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogistic(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Logistic>(input, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/MaxPool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMaxPool2D(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsPool2DOptions();
+
+ Pool2DParams params{};
+ params.padding = luci_padding(options->padding);
+ params.filter_height = options->filter_height;
+ params.filter_width = options->filter_width;
+ params.stride_height = options->stride_h;
+ params.stride_width = options->stride_w;
+ params.activation = luci_actfunc(options->fused_activation_function);
+
+ return std::make_unique<kernels::MaxPool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Maximum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMaximum(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input1 = inputs.at(0);
+ const Tensor *input2 = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Maximum>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Mean.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMean(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *axis = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ auto temp_index_unique = std::make_unique<Tensor>(DataType::S32, Shape({}), nullptr);
+ temp_index_unique->set_data_buffer(nullptr);
+ Tensor *temp_index = builder.get_runtime_graph()->addTensor(std::move(temp_index_unique));
+
+ auto resolved_axes_unique = std::make_unique<Tensor>(DataType::S32, Shape({}), nullptr);
+ resolved_axes_unique->set_data_buffer(nullptr);
+ Tensor *resolved_axes = builder.get_runtime_graph()->addTensor(std::move(resolved_axes_unique));
+
+ auto temp_sum_unique = std::make_unique<Tensor>(input->element_type(), Shape({}), nullptr);
+ temp_sum_unique->set_data_buffer(nullptr);
+ Tensor *temp_sum = builder.get_runtime_graph()->addTensor(std::move(temp_sum_unique));
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsReducerOptions();
+
+ ReducerParams params{};
+ params.keep_dims = options->keep_dims;
+
+ return std::make_unique<kernels::Mean>(input, axis, output, temp_index, resolved_axes, temp_sum,
+ params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Minimum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMinimum(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input1 = inputs.at(0);
+ const Tensor *input2 = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Minimum>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/MirrorPad.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMirrorPad(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *paddings = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsMirrorPadOptions();
+
+ MirrorPadParams params{};
+ params.mode = luci_mirrorpad_mode(options->mode);
+
+ return std::make_unique<kernels::MirrorPad>(input, paddings, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Mul.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMul(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input1 = inputs.at(0);
+ const Tensor *input2 = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsMulOptions();
+
+ MulParams params{};
+ params.activation = luci_actfunc(options->fused_activation_function);
+
+ return std::make_unique<kernels::Mul>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Neg.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleNeg(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Neg>(input, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/NotEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleNotEqual(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *x = inputs.at(0);
+ const Tensor *y = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::NotEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/OneHot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleOneHot(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 4);
+
+ const Tensor *indices = inputs.at(0);
+ const Tensor *depth = inputs.at(1);
+ const Tensor *on_value = inputs.at(2);
+ const Tensor *off_value = inputs.at(3);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsOneHotOptions();
+
+ OneHotParams params{};
+ params.axis = options->axis;
+
+ return std::make_unique<kernels::OneHot>(indices, depth, on_value, off_value, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/PRelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePRelu(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *alpha = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::PRelu>(input, alpha, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pack.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePack(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ std::vector<const Tensor *> input_tensors(inputs.size());
+ for (uint32_t i = 0; i < inputs.size(); ++i)
+ {
+ input_tensors[i] = inputs.at(i);
+ }
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsPackOptions();
+
+ PackParams params{};
+ params.axis = options->axis;
+ params.values_count = options->values_count;
+
+ return std::make_unique<kernels::Pack>(std::move(input_tensors), output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pad.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePad(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *paddings = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Pad>(input, paddings, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/PadV2.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePadV2(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 3);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *paddings = inputs.at(1);
+ const Tensor *constant_values = inputs.at(2);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::PadV2>(input, paddings, constant_values, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pow.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePow(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input1 = inputs.at(0);
+ const Tensor *input2 = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Pow>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Quantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleQuantize(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Quantize>(input, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Relu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRelu(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Relu>(input, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Relu6.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRelu6(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Relu6>(input, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Reshape.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReshape(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *shape = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ // NOTE 'newShape' attribute is ignored.
+ return std::make_unique<kernels::Reshape>(input, shape, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ResizeBilinear.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleResizeBilinear(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *size = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsResizeBilinearOptions();
+
+ ResizeBilinearParams params{};
+ params.align_corners = options->align_corners;
+ params.half_pixel_centers = options->half_pixel_centers;
+
+ return std::make_unique<kernels::ResizeBilinear>(input, size, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ResizeNearestNeighbor.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleResizeNearestNeighbor(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs, const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *size = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsResizeNearestNeighborOptions();
+
+ ResizeNearestNeighborParams params{};
+ params.align_corners = options->align_corners;
+ // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated
+ // Current CircleResizeNearestNeighbor don't have half_pixel_centers.
+ // default value on current is false.
+ // it need to be updated when CircleResizeNearestNeighbor updated.
+ params.half_pixel_centers = false;
+
+ return std::make_unique<kernels::ResizeNearestNeighbor>(input, size, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ReverseV2.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReverseV2(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *axis = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::ReverseV2>(input, axis, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Rsqrt.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRsqrt(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Rsqrt>(input, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SVDF.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSVDF(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 5);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *feature = inputs.at(1);
+ const Tensor *time = inputs.at(2);
+ const Tensor *bias = inputs.at(3);
+ const Tensor *input_activation_state = inputs.at(4);
+ Tensor *output = outputs.at(0);
+
+ auto scratchpad_tensor =
+ std::make_unique<Tensor>(input_activation_state->element_type(), Shape({}), nullptr);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp = builder.get_runtime_graph()->addTensor(std::move(scratchpad_tensor));
+
+ DataType data_type = input->element_type() == DataType::S8 ? DataType::S32 : DataType::FLOAT32;
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), nullptr);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_1 = builder.get_runtime_graph()->addTensor(std::move(scratchpad_tensor));
+
+ if (data_type == DataType::FLOAT32 &&
+ (feature->element_type() == DataType::S8 || feature->element_type() == DataType::U8))
+ {
+ data_type = feature->element_type();
+ }
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), nullptr);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_2 = builder.get_runtime_graph()->addTensor(std::move(scratchpad_tensor));
+
+ data_type = DataType::FLOAT32;
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), nullptr);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_3 = builder.get_runtime_graph()->addTensor(std::move(scratchpad_tensor));
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), nullptr);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_4 = builder.get_runtime_graph()->addTensor(std::move(scratchpad_tensor));
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), nullptr);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_5 = builder.get_runtime_graph()->addTensor(std::move(scratchpad_tensor));
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), nullptr);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_6 = builder.get_runtime_graph()->addTensor(std::move(scratchpad_tensor));
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsSVDFOptions();
+
+ SVDFParams params{};
+ params.activation = luci_actfunc(options->fused_activation_function);
+ params.svdf_rank = options->rank;
+ params.asymmetric_quantize_inputs = options->asymmetric_quantize_inputs;
+
+ return std::make_unique<kernels::SVDF>(input, feature, time, bias, input_activation_state, output,
+ tmp, tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Shape.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleShape(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsShapeOptions();
+
+ ShapeParams shape_params{};
+ shape_params.out_type = luci_datatype(options->out_type);
+
+ return std::make_unique<kernels::ShapeKernel>(input, output, shape_params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Slice.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSlice(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 3);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *begin = inputs.at(1);
+ const Tensor *size = inputs.at(2);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Slice>(input, begin, size, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Softmax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSoftmax(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsSoftmaxOptions();
+
+ SoftmaxParams params{};
+ params.beta = options->beta;
+
+ return std::make_unique<kernels::Softmax>(input, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SpaceToBatchND.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSpaceToBatchND(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 3);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *block_shape = inputs.at(1);
+ const Tensor *paddings = inputs.at(2);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SpaceToDepth.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSpaceToDepth(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsSpaceToDepthOptions();
+
+ SpaceToDepthParams params{};
+ params.block_size = options->block_size;
+
+ return std::make_unique<kernels::SpaceToDepth>(input, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Split.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSplit(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *axis = inputs.at(0);
+ const Tensor *input = inputs.at(1);
+ std::vector<Tensor *> output_tensors(outputs.size());
+
+ for (uint32_t i = 0; i < outputs.size(); ++i)
+ {
+ output_tensors[i] = outputs.at(i).first;
+ }
+
+ // NOTE 'num_splits' attribute is ignored.
+ return std::make_unique<kernels::Split>(axis, input, std::move(output_tensors));
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SplitV.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSplitV(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 3);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *sizes_data = inputs.at(1);
+ const Tensor *axis = inputs.at(2);
+ std::vector<Tensor *> output_tensors(outputs.size());
+
+ for (uint32_t i = 0; i < outputs.size(); ++i)
+ {
+ output_tensors[i] = outputs.at(i).first;
+ }
+
+ // NOTE 'num_splits' attribute is ignored.
+ return std::make_unique<kernels::SplitV>(input, sizes_data, axis, std::move(output_tensors));
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sqrt.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSqrt(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Sqrt>(input, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Square.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSquare(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Square>(input, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SquaredDifference.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSquaredDifference(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input1 = inputs.at(0);
+ const Tensor *input2 = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::SquaredDifference>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Squeeze.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSqueeze(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsSqueezeOptions();
+
+ SqueezeParams params{};
+ params.squeeze_dims = options->squeeze_dims;
+
+ return std::make_unique<kernels::Squeeze>(input, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/StridedSlice.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleStridedSlice(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 4);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *begin = inputs.at(1);
+ const Tensor *end = inputs.at(2);
+ const Tensor *strides = inputs.at(3);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsStridedSliceOptions();
+
+ StridedSliceParams params{};
+ params.begin_mask = options->begin_mask;
+ params.ellipsis_mask = options->ellipsis_mask;
+ params.end_mask = options->end_mask;
+ params.new_axis_mask = options->new_axis_mask;
+ params.shrink_axis_mask = options->shrink_axis_mask;
+
+ return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sub.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSub(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input1 = inputs.at(0);
+ const Tensor *input2 = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsSubOptions();
+
+ SubParams params{};
+ params.activation = luci_actfunc(options->fused_activation_function);
+
+ return std::make_unique<kernels::Sub>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Tanh.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTanh(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+ const Tensor *input = inputs.at(0);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Tanh>(input, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Transpose.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTranspose(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 2);
+
+ const Tensor *input = inputs.at(0);
+ const Tensor *perm = inputs.at(1);
+ Tensor *output = outputs.at(0);
+
+ return std::make_unique<kernels::Transpose>(input, perm, output);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/TransposeConv.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTransposeConv(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index,
+ KernelBuilder &builder)
+{
+ assert(inputs.size() == 4);
+
+ const Tensor *input_sizes = inputs.at(0);
+ const Tensor *filter = inputs.at(1);
+ const Tensor *out_backprop = inputs.at(2);
+ const Tensor *bias = inputs.at(3);
+ Tensor *output = outputs.at(0);
+
+ DataType scratch_data_type =
+ input_sizes->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+
+ auto scratch_tensor = std::make_unique<Tensor>(scratch_data_type, Shape({}), nullptr);
+ scratch_tensor->set_data_buffer(nullptr);
+ Tensor *tmp = builder.get_runtime_graph()->addTensor(std::move(scratch_tensor));
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsTransposeConvOptions();
+
+ TransposeConvParams params{};
+ params.padding = luci_padding(options->padding);
+ params.stride_height = options->stride_h;
+ params.stride_width = options->stride_w;
+
+ return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
+ tmp, params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/UnidirectionalSequenceLSTM.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleUnidirectionalSequenceLSTM(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 24);
+ const Tensor *input = inputs.at(0);
+ const Tensor *input_to_input_weights = inputs.at(1);
+ const Tensor *input_to_forget_weights = inputs.at(2);
+ const Tensor *input_to_cell_weights = inputs.at(3);
+ const Tensor *input_to_output_weights = inputs.at(4);
+
+ const Tensor *recurrent_to_input_weights = inputs.at(5);
+ const Tensor *recurrent_to_forget_weights = inputs.at(6);
+ const Tensor *recurrent_to_cell_weights = inputs.at(7);
+ const Tensor *recurrent_to_output_weights = inputs.at(8);
+
+ const Tensor *cell_to_input_weights = inputs.at(9);
+ const Tensor *cell_to_forget_weights = inputs.at(10);
+ const Tensor *cell_to_output_weights = inputs.at(11);
+
+ const Tensor *input_gate_bias = inputs.at(12);
+ const Tensor *forget_gate_bias = inputs.at(13);
+ const Tensor *cell_gate_bias = inputs.at(14);
+ const Tensor *output_gate_bias = inputs.at(15);
+
+ const Tensor *projection_weights = inputs.at(16);
+ const Tensor *projection_bias = inputs.at(17);
+
+ Tensor *output_state = const_cast<Tensor *>(inputs.at(18));
+ Tensor *cell_state = const_cast<Tensor *>(inputs.at(19));
+
+ const Tensor *input_layer_norm_coefficients = inputs.at(20);
+ const Tensor *forget_layer_norm_coefficients = inputs.at(21);
+ const Tensor *cell_layer_norm_coefficients = inputs.at(22);
+ const Tensor *output_layer_norm_coefficients = inputs.at(23);
+ Tensor *output = outputs.at(0);
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsUnidirectionalSequenceLSTMOptions();
+
+ UnidirectionalSequenceLSTMParams params{};
+ params.activation = luci_actfunc(options->fused_activation_function);
+ params.cell_clip = options->cell_clip;
+ params.proj_clip = options->proj_clip;
+ params.time_major = options->time_major;
+ params.asymmetric_quantize_inputs = options->asymmetric_quantize_inputs;
+
+ // scratch pad tensor
+ const bool is_integer = input->element_type() == DataType::S8;
+ bool use_layer_norm = (forget_layer_norm_coefficients != nullptr);
+
+ if (is_integer)
+ {
+ if (not use_layer_norm)
+ {
+ params.intermediate_affine_quant =
+ builder.get_runtime_graph()->getIntermediateAffineQuantizations();
+
+ // For integer LSTM need 4 16-bit buffer with size n_batch * n_cell
+ // and 1 8-bit buffer with size n_batch * n_cell
+ auto tmp_1 = std::make_unique<Tensor>(DataType::S16, Shape({}), nullptr);
+ tmp_1->set_data_buffer(nullptr);
+ outputs.push_back(builder.get_runtime_graph()->addTensor(std::move(tmp_1)));
+
+ auto tmp_2 = std::make_unique<Tensor>(DataType::S16, Shape({}), nullptr);
+ tmp_2->set_data_buffer(nullptr);
+ outputs.push_back(builder.get_runtime_graph()->addTensor(std::move(tmp_2)));
+
+ auto tmp_3 = std::make_unique<Tensor>(DataType::S16, Shape({}), nullptr);
+ tmp_3->set_data_buffer(nullptr);
+ outputs.push_back(builder.get_runtime_graph()->addTensor(std::move(tmp_3)));
+
+ auto tmp_4 = std::make_unique<Tensor>(DataType::S16, Shape({}), nullptr);
+ tmp_4->set_data_buffer(nullptr);
+ outputs.push_back(builder.get_runtime_graph()->addTensor(std::move(tmp_4)));
+
+ auto tmp_5 = std::make_unique<Tensor>(
+ DataType::S8, Shape({}),
+ builder.get_runtime_graph()->getIntermediateAffineQuantizations()[0]);
+ tmp_5->set_data_buffer(nullptr);
+ outputs.push_back(builder.get_runtime_graph()->addTensor(std::move(tmp_5)));
+ }
+ else
+ {
+ // TODO: support float
+ assert(false && "Not supported now");
+ }
+ }
+ else
+ {
+ // NOTE provide more scratch pads if support hybrid or integer
+ auto sp_output_state =
+ std::make_unique<Tensor>(output_state->element_type(), Shape({}), nullptr);
+ sp_output_state->set_data_buffer(nullptr);
+ outputs.push_back(builder.get_runtime_graph()->addTensor(std::move(sp_output_state)));
+
+ auto sp_cell_state = std::make_unique<Tensor>(cell_state->element_type(), Shape({}), nullptr);
+ sp_cell_state->set_data_buffer(nullptr);
+ outputs.push_back(builder.get_runtime_graph()->addTensor(std::move(sp_cell_state)));
+
+ auto sp_3 = std::make_unique<Tensor>(input->element_type(), Shape({}), nullptr);
+ sp_3->set_data_buffer(nullptr);
+ outputs.push_back(builder.get_runtime_graph()->addTensor(std::move(sp_3)));
+ }
+
+ outputs.push_back(output_state);
+ outputs.push_back(cell_state);
+
+ return std::make_unique<kernels::UnidirectionalSequenceLSTM>(
+ input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights,
+ input_to_output_weights, recurrent_to_input_weights, recurrent_to_forget_weights,
+ recurrent_to_cell_weights, recurrent_to_output_weights, cell_to_input_weights,
+ cell_to_forget_weights, cell_to_output_weights, input_gate_bias, forget_gate_bias,
+ cell_gate_bias, output_gate_bias, projection_weights, projection_bias,
+ input_layer_norm_coefficients, forget_layer_norm_coefficients, cell_layer_norm_coefficients,
+ output_layer_norm_coefficients, std::move(outputs), params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Unpack.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleUnpack(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ assert(inputs.size() == 1);
+
+ const Tensor *input = inputs.at(0);
+ std::vector<Tensor *> output_tensors(outputs.size());
+
+ for (uint32_t i = 0; i < outputs.size(); ++i)
+ {
+ output_tensors[i] = outputs.at(i);
+ }
+
+ circle::OperatorT oper_t;
+ builder.get_circle_reader()->operators()[op_index]->UnPackTo(&oper_t);
+ const auto *options = oper_t.builtin_options.AsUnpackOptions();
+
+ UnpackParams params{};
+ params.axis = options->axis;
+
+ // NOTE 'num' attribute is ignored.
+ return std::make_unique<kernels::Unpack>(input, std::move(output_tensors), params);
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/While.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleWhile(std::vector<const Tensor *> &&inputs,
+ std::vector<Tensor *> &&outputs,
+ const uint32_t op_index, KernelBuilder &builder)
+{
+ // TODO: support IF operation
+ assert(false && "Not supported now");
+}
+
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if 0
+
+#include "BuddyMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+BuddyMemoryManager::BuddyMemoryManager(uint8_t *memory_start, int32_t memSize)
+{
+ int32_t p = lowerLog2(memSize);
+
+ // We assume that the requested size of memory does not exceed 4 GB
+ assert(p < 32);
+ memSize = 1 << p;
+
+ _start_block = reinterpret_cast<Block *>(memory_start);
+ _start_block->size = memSize - sizeof(Block);
+ _start_block->is_free = true;
+ _start_block->self = _start_block;
+ _num_blocks = 0;
+ _size = _start_block->size;
+
+ for (auto &_free_block : _free_blocks)
+ _free_block = nullptr;
+
+ addToBlocks(_start_block, p);
+}
+
+void BuddyMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+ const size_t element_size = getDataTypeSize(tensor.element_type());
+ const int32_t num_elements = tensor.shape().num_elements();
+ auto size = num_elements * element_size;
+ auto footprint = size + sizeof(Block);
+ auto l = (footprint & (footprint - 1)) == 0
+ ? lowerLog2(footprint)
+ : lowerLog2(footprint) + 1; // check footprint is pow_of_2
+
+ while (l < 32 && !_free_blocks[l])
+ l++;
+
+ assert(l < 32);
+
+ Block *tmp;
+ tmp = _free_blocks[l];
+ removeFromBlocks(tmp, l);
+
+ while ((tmp->size + sizeof(Block)) / 2 >= size + sizeof(Block))
+ {
+ divideBlock(tmp, l);
+ l--;
+ }
+
+ tmp->is_free = false;
+ tmp->self = tmp;
+ _num_blocks++;
+
+ auto *data = (uint8_t *)(tmp + 1);
+ tensor.set_data_buffer(data);
+}
+
+void BuddyMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+ auto data = tensor.data<void>();
+ auto *tmp = (Block *)((uint8_t *)data - sizeof(Block));
+
+ assert(tmp->self == tmp);
+
+ tmp->is_free = true;
+ addToBlocks(tmp, lowerLog2(tmp->size + sizeof(Block)));
+
+ while (tmp)
+ if (tmp->size == _size)
+ break;
+ else
+ tmp = mergeBlock(tmp);
+
+ _num_blocks--;
+ tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
+
+#endif
--- /dev/null
+/* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if 0
+
+#include "MemoryManager.h"
+
+#ifndef LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+
+namespace luci_interpreter
+{
+
+class BuddyMemoryManager : public IMemoryManager
+{
+public:
+ BuddyMemoryManager(uint8_t *memory_start, int32_t memSize);
+
+ void allocate_memory(luci_interpreter::Tensor &tensor) final;
+ void release_memory(luci_interpreter::Tensor &tensor) final;
+
+private:
+ struct Block
+ {
+ Block *next_free;
+ bool is_free;
+ uint32_t size;
+ // debug field
+ Block *self;
+ };
+
+ Block *_start_block;
+ int32_t _num_blocks;
+ uint32_t _size;
+ Block *_free_blocks[32]{};
+
+ static int32_t lowerLog2(uint32_t val)
+ {
+ int32_t i = 0;
+ while (val >>= 1)
+ i++;
+
+ return i;
+ }
+
+ void addToBlocks(Block *block, int32_t l)
+ {
+ if (!block)
+ return;
+
+ block->next_free = _free_blocks[l];
+ _free_blocks[l] = block;
+ }
+
+ void removeFromBlocks(const Block *block, int32_t l)
+ {
+ if (!block)
+ return;
+
+ Block *tmp = _free_blocks[l];
+
+ if (block == tmp)
+ {
+ _free_blocks[l] = block->next_free;
+ return;
+ }
+
+ while (tmp)
+ {
+ if (tmp->next_free == block)
+ {
+ tmp->next_free = block->next_free;
+ return;
+ }
+
+ tmp = tmp->next_free;
+ }
+ }
+
+ void divideBlock(Block *block, int32_t l)
+ {
+ int32_t size = ((block->size + sizeof(Block)) / 2) - sizeof(Block);
+
+ removeFromBlocks(block, l);
+
+ // there is no need to add to the free_blocks list here
+ block->is_free = true;
+ block->size = size;
+ block->self = block;
+
+ Block *buddy;
+ buddy = (Block *)((uint8_t *)block + sizeof(Block) + size);
+ buddy->is_free = true;
+ buddy->size = size;
+ buddy->self = buddy;
+
+ addToBlocks(buddy, l - 1);
+ }
+
+ Block *mergeBlock(Block *block)
+ {
+ Block *buddy;
+
+ const int32_t l = lowerLog2(block->size + sizeof(Block));
+
+ const int64_t address = ((uint8_t *)block - (uint8_t *)_start_block);
+ buddy = (Block *)((address ^ (1 << l)) + (uint8_t *)_start_block);
+
+ if (!buddy->is_free || buddy->size != block->size)
+ return nullptr;
+
+ if (block > buddy)
+ {
+ Block *x = block;
+ block = buddy;
+ buddy = x;
+ }
+
+ removeFromBlocks(block, l);
+ removeFromBlocks(buddy, l);
+
+ block->size = block->size * 2 + sizeof(Block);
+ block->is_free = true;
+ block->self = block;
+
+ addToBlocks(block, l + 1);
+
+ return block;
+ }
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if 0
+
+#include "BuddyMemoryManager.h"
+#include <gtest/gtest.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(BuddyMemoryManager, basic)
+{
+ auto mem_pool = std::make_unique<uint8_t[]>(200);
+ auto buddy_memory_manager = std::make_unique<BuddyMemoryManager>(mem_pool.get(), 130);
+ Tensor first_tensor(DataType::U8, Shape({8}), AffineQuantization{}, "first_tensor");
+
+ buddy_memory_manager->allocate_memory(first_tensor);
+
+ uint8_t data_1[] = {1, 2, 3, 4, 5, 6, 7, 8};
+
+ first_tensor.writeData(data_1, 8);
+ uint8_t array_1[8];
+ first_tensor.readData(array_1, 8);
+ for (int i = 0; i < 8; i++)
+ {
+ EXPECT_EQ(data_1[i], array_1[i]);
+ }
+
+ Tensor second_tensor(DataType::U8, Shape({2, 5}), AffineQuantization{}, "second_tensor");
+ buddy_memory_manager->allocate_memory(second_tensor);
+
+ uint8_t data_2[2][5] = {{11, 22, 33, 44, 55}, {12, 23, 34, 45, 56}};
+ second_tensor.writeData(data_2, 10);
+
+ uint8_t array_2[2][5];
+ second_tensor.readData(array_2, 10);
+ for (int i = 0; i < 2; i++)
+ {
+ for (int j = 0; j < 5; j++)
+ {
+ EXPECT_EQ(data_2[i][j], array_2[i][j]);
+ }
+ }
+
+ buddy_memory_manager->release_memory(first_tensor);
+ EXPECT_EQ(first_tensor.data<void>(), nullptr);
+
+ buddy_memory_manager->release_memory(second_tensor);
+ EXPECT_EQ(second_tensor.data<void>(), nullptr);
+}
+
+} // namespace
+} // namespace luci_interpreter
+
+#endif
--- /dev/null
+set(SOURCES
+ "SimpleMemoryManager.h" SimpleMemoryManager.cpp
+ "TestMemoryManager.h" TestMemoryManager.cpp
+ "BuddyMemoryManager.h" BuddyMemoryManager.cpp
+ "StaticMemoryManager.h" StaticMemoryManager.cpp)
+
+add_library(${LUCI_INTERPRETER_MEMORY_MANAGER} STATIC ${SOURCES})
+target_include_directories(${LUCI_INTERPRETER_MEMORY_MANAGER} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_MEMORY_MANAGER} PUBLIC "luci_micro_circle_reader${READER_SUFFIX}")
+target_link_libraries(${LUCI_INTERPRETER_MEMORY_MANAGER} PUBLIC luci_micro_circle_schema)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+set(TEST_SOURCES BuddyMemoryManager.test.cpp)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(buddy_manager_test_micro ${TEST_SOURCES})
+target_link_libraries(buddy_manager_test_micro ${LUCI_INTERPRETER_BINARY})
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef USE_STATIC_ALLOC
+
+#include "SimpleMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+uint8_t *SimpleMemoryManager::allocate_memory(const circle::Tensor *tensor)
+{
+ const auto element_size = getDataTypeSize(Tensor::element_type(tensor));
+ const auto num_elements = Tensor::num_elements(tensor);
+
+ assert(element_size * num_elements > 0);
+
+ return new uint8_t[num_elements * element_size];
+}
+
+void SimpleMemoryManager::release_memory(uint8_t *data)
+{
+ if (data == nullptr)
+ return;
+
+ delete[] data;
+}
+
+} // namespace luci_interpreter
+
+#endif // USE_STATIC_ALLOC
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef USE_STATIC_ALLOC
+#ifndef LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+
+#include "luci_interpreter/core/DataType.h"
+#include "luci_interpreter/core/Tensor.h"
+
+#include <cassert>
+
+namespace luci_interpreter
+{
+
+class SimpleMemoryManager
+{
+public:
+ uint8_t *allocate_memory(const circle::Tensor *tensor);
+ void release_memory(uint8_t *data);
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+#endif // USE_STATIC_ALLOC
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef USE_STATIC_ALLOC
+
+#include "StaticMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+uint8_t *StaticMemoryManager::allocate_memory(int32_t offset)
+{
+ assert(_buffer_ptr != nullptr);
+ return _buffer_ptr + offset;
+}
+
+uint8_t *StaticMemoryManager::allocate_memory_for_input(int32_t offset)
+{
+ assert(_input_buffer_ptr != nullptr);
+ return _input_buffer_ptr + offset;
+}
+
+uint8_t *StaticMemoryManager::allocate_memory_for_output(int32_t offset)
+{
+ assert(_output_buffer_ptr != nullptr);
+ return _output_buffer_ptr + offset;
+}
+
+void StaticMemoryManager::allocate_input_buf()
+{
+ assert(_input_req_size > 0);
+ if (_input_buffer_ptr == nullptr)
+ _input_buffer_ptr = new uint8_t[_input_req_size];
+}
+
+void StaticMemoryManager::allocate_output_buf()
+{
+ assert(_output_req_size > 0);
+ if (_output_buffer_ptr == nullptr)
+ _output_buffer_ptr = new uint8_t[_output_req_size];
+}
+
+void StaticMemoryManager::allocate_computing_buf()
+{
+ assert(_buffer_req_size > 0);
+ if (_buffer_ptr == nullptr)
+ _buffer_ptr = new uint8_t[_buffer_req_size];
+}
+
+void StaticMemoryManager::release_computing_buf()
+{
+ delete[] _buffer_ptr;
+ _buffer_ptr = nullptr;
+}
+
+void StaticMemoryManager::release_input_buf()
+{
+ delete[] _input_buffer_ptr;
+ _input_buffer_ptr = nullptr;
+}
+
+void StaticMemoryManager::release_output_buf()
+{
+ delete[] _output_buffer_ptr;
+ _output_buffer_ptr = nullptr;
+}
+
+} // namespace luci_interpreter
+
+#endif // USE_STATIC_ALLOC
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef USE_STATIC_ALLOC
+
+#ifndef LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+
+#include "luci_interpreter/core/DataType.h"
+#include "luci_interpreter/core/Tensor.h"
+
+#include <cassert>
+
+namespace luci_interpreter
+{
+
+// Used for allocations in static buffer, using offsets defined in luci model.
+class StaticMemoryManager
+{
+public:
+ StaticMemoryManager() = delete;
+
+ // To initialize static memory manager with precalculating required buffers size for input,
+ // output and for intermediate computations buffers.
+ // Using Static Memory Manager with common buffer for input, output, and for intermediate
+ // computations
+ // TODO remove this *_req_size to read it from circle file
+ explicit StaticMemoryManager(int32_t input_req_size, int32_t buffer_req_size,
+ int32_t output_req_size)
+ : _input_buffer_ptr(nullptr), _buffer_ptr(nullptr), _output_buffer_ptr(nullptr),
+ _input_req_size(input_req_size), _buffer_req_size(buffer_req_size),
+ _output_req_size(output_req_size)
+ { /* Do nothing */
+ }
+
+ // To set a pointer for tensor in _buffer_ptr with right offset
+ uint8_t *allocate_memory(int32_t offset);
+ // To set a pointer for tensor in input_buffer with right offset
+ uint8_t *allocate_memory_for_input(int32_t offset);
+ // To set a pointer for tensor in output_buffer with right offset
+ uint8_t *allocate_memory_for_output(int32_t offset);
+
+ // Methods to set data pointer for tensor
+ // To allocate input memory buffer with _input_req_size * size_type bytes. Result pointer -
+ // _input_buffer_ptr
+ void allocate_input_buf();
+ // To allocate input memory buffer with _output_req_size * size_type bytes. Result pointer -
+ // _output_buffer_ptr
+ void allocate_output_buf();
+ // To allocate intermediate computing memory buffer with _buffer_req_size * size_type bytes.
+ // Result pointer - _buffer_ptr
+ void allocate_computing_buf();
+
+ // To delete memory for intermediate computing buffer
+ void release_computing_buf();
+ // To delete memory for input buffer
+ void release_input_buf();
+ // To delete memory for output buffer
+ void release_output_buf();
+
+private:
+ // Stores a pointer to the beginning of the allocated memory buffer.
+ uint8_t *_buffer_ptr;
+ uint8_t *_input_buffer_ptr;
+ uint8_t *_output_buffer_ptr;
+
+ // TODO remove this fields to read it from circle file
+ int32_t _input_req_size{};
+ int32_t _buffer_req_size{};
+ int32_t _output_req_size{};
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+
+#endif // USE_STATIC_ALLOC
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO Enable it
+
+#if 0
+
+#include "TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void TestMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+ if (!tensor.is_allocatable())
+ {
+ return;
+ }
+ if (tensor.is_data_allocated())
+ {
+ release_memory(tensor);
+ }
+ const auto element_size = getDataTypeSize(tensor.element_type());
+ const auto num_elements = tensor.shape().num_elements();
+
+ auto *data = new uint8_t[num_elements * element_size];
+ allocations.push_back(data);
+ tensor.set_data_buffer(data);
+}
+
+void TestMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+ tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO Enable it
+
+#if 0
+
+#ifndef LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+
+#include "MemoryManager.h"
+
+namespace luci_interpreter
+{
+// Memory Manager for using in kernels tests. This eliminates the need to manually delete the
+// allocated memory in tests. This mem_manager remembers all its allocations and in destructor
+// delete all allocations.
+class TestMemoryManager : public IMemoryManager
+{
+public:
+ void allocate_memory(luci_interpreter::Tensor &tensor) final;
+ void release_memory(luci_interpreter::Tensor &tensor) final;
+
+ ~TestMemoryManager() override
+ {
+ for (auto allocation : allocations)
+ {
+ delete[] allocation;
+ }
+ }
+
+private:
+ std::vector<uint8_t *> allocations;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+
+#endif
--- /dev/null
+cmake_minimum_required(VERSION 3.15)
+project(luci_interpreter_micro_standalone)
+
+include(${NNAS_ROOT}/infra/onert-micro/utils.cmake)
+
+nnas_find_package(FlatBuffersSource EXACT 2.0 QUIET)
+include_directories(${FlatBuffersSource_DIR}/include)
+
+# TODO: fix luci/plan for new luci-micro without luci/IR
+add_subdirectory(${NNAS_PROJECT_SOURCE_DIR}/onert-micro/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter)
--- /dev/null
+CMAKE_MINIMUM_REQUIRED(VERSION 3.9)
+SET(CMAKE_SYSTEM_NAME Generic)
+SET(CMAKE_CROSSCOMPILING TRUE)
+
+# force compiler settings
+SET(CMAKE_C_COMPILER_WORKS TRUE)
+SET(CMAKE_CXX_COMPILER_WORKS TRUE)
+
+# force cmake compilers
+SET(CMAKE_ASM_COMPILER "arm-none-eabi-gcc")
+SET(CMAKE_C_COMPILER "arm-none-eabi-gcc")
+SET(CMAKE_CXX_COMPILER "arm-none-eabi-g++")
+SET(ELF2BIN "arm-none-eabi-objcopy")
+
+
+# if the environment does not specify build type, set to Debug
+IF (NOT CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE "Debug"
+ CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
+ FORCE)
+ENDIF ()
+
+# here starts the project
+PROJECT(mbed-os-example-onert-micro C CXX ASM)
+
+# uncomment below to have a verbose build process
+#SET(CMAKE_VERBOSE_MAKEFILE ON)
+
+SET(LD_SYS_LIBS "-Wl,--start-group -lstdc++ -lsupc++ -lm -lc -lgcc -lnosys -Wl,--end-group")
+
+
+SET(CMAKE_C_FLAGS "-g3 -std=gnu11 -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fmessage-length=0 -fexceptions -ffunction-sections -fdata-sections")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -funsigned-char -MMD -fomit-frame-pointer -Og -DMBED_DEBUG")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMBED_TRAP_ERRORS_ENABLED=1 -DMBED_MINIMAL_PRINTF -mcpu=cortex-m7 -mthumb")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=fpv5-d16 -mfloat-abi=softfp -DMBED_ROM_START=0x8000000")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMBED_ROM_SIZE=0x200000 -DMBED_RAM_START=0x20000000 -DMBED_RAM_SIZE=0x20000")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMBED_RAM1_START=0x24000000 -DMBED_RAM1_SIZE=0x80000")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/mbed_config.h")
+
+SET(CMAKE_CXX_FLAGS "-g3 -std=gnu++14 -frtti -Wvla -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmessage-length=0 -fexceptions -ffunction-sections -fdata-sections")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funsigned-char -MMD -fomit-frame-pointer -Og -DMBED_DEBUG")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMBED_TRAP_ERRORS_ENABLED=1 -DMBED_MINIMAL_PRINTF -mcpu=cortex-m7")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mthumb -mfpu=fpv5-d16 -mfloat-abi=softfp -DMBED_ROM_START=0x8000000")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMBED_ROM_SIZE=0x200000 -DMBED_RAM_START=0x20000000 -DMBED_RAM_SIZE=0x20000")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMBED_RAM1_START=0x24000000 -DMBED_RAM1_SIZE=0x80000")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/mbed_config.h")
+
+SET(CMAKE_ASM_FLAGS "-g3 -x assembler-with-cpp -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
+SET(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -fmessage-length=0 -fexceptions -ffunction-sections -fdata-sections")
+SET(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -funsigned-char -MMD -fomit-frame-pointer -Og -DMBED_DEBUG")
+SET(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -DMBED_TRAP_ERRORS_ENABLED=1 -DMBED_MINIMAL_PRINTF -mcpu=cortex-m7")
+SET(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mthumb -mfpu=fpv5-d16 -mfloat-abi=softfp ")
+SET(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/mbed_config.h")
+
+SET(CMAKE_CXX_LINK_FLAGS "-Wl,--gc-sections -Wl,--wrap,main -Wl,--wrap,_malloc_r -Wl,--wrap,_free_r")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wl,--wrap,_realloc_r -Wl,--wrap,__memalign_r -Wl,--wrap,__calloc_r")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wl,--wrap,exit -Wl,--wrap,atexit -Wl,-n -Wl,--wrap,printf")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wl,--wrap,sprintf -Wl,--wrap,snprintf -Wl,--wrap,vprintf")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wl,--wrap,vsprintf -Wl,--wrap,vsnprintf -Wl,--wrap,fprintf")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wl,--wrap,vfprintf -mcpu=cortex-m7 -mthumb -mfpu=fpv5-d16")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -mfloat-abi=softfp -Wall -Wextra -Wno-unused-parameter")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -Wno-missing-field-initializers -fmessage-length=0 -fexceptions")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -ffunction-sections -fdata-sections -funsigned-char -MMD")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fomit-frame-pointer -Og -DMBED_DEBUG -DMBED_TRAP_ERRORS_ENABLED=1")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -DMBED_MINIMAL_PRINTF -mcpu=cortex-m7 -mthumb -mfpu=fpv5-d16")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -mfloat-abi=softfp -DMBED_ROM_START=0x8000000")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -DMBED_ROM_SIZE=0x200000 -DMBED_RAM_START=0x20000400")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -DMBED_RAM_SIZE=0x1FC00 -DMBED_RAM1_START=0x24000000")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -DMBED_RAM1_SIZE=0x80000 -DMBED_BOOT_STACK_SIZE=1024 -DXIP_ENABLE=0")
+SET(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} ${LD_SYS_LIBS} -T ${CMAKE_BINARY_DIR}/build_test_pp.link_script.ld")
+
+ADD_DEFINITIONS(
+ -DARM_MATH_CM7
+ -DCOMPONENT_FLASHIAP=1
+ -DDEVICE_ANALOGIN=1
+ -DDEVICE_ANALOGOUT=1
+ -DDEVICE_CAN=1
+ -DDEVICE_CRC=1
+ -DDEVICE_EMAC=1
+ -DDEVICE_FLASH=1
+ -DDEVICE_I2C=1
+ -DDEVICE_I2CSLAVE=1
+ -DDEVICE_I2C_ASYNCH=1
+ -DDEVICE_INTERRUPTIN=1
+ -DDEVICE_LPTICKER=1
+ -DDEVICE_MPU=1
+ -DDEVICE_PORTIN=1
+ -DDEVICE_PORTINOUT=1
+ -DDEVICE_PORTOUT=1
+ -DDEVICE_PWMOUT=1
+ -DDEVICE_RESET_REASON=1
+ -DDEVICE_RTC=1
+ -DDEVICE_SERIAL=1
+ -DDEVICE_SERIAL_FC=1
+ -DDEVICE_SLEEP=1
+ -DDEVICE_SPI=1
+ -DDEVICE_SPISLAVE=1
+ -DDEVICE_SPI_ASYNCH=1
+ -DDEVICE_STDIO_MESSAGES=1
+ -DDEVICE_TRNG=1
+ -DDEVICE_USBDEVICE=1
+ -DDEVICE_USTICKER=1
+ -DDEVICE_WATCHDOG=1
+ -DEXTRA_IDLE_STACK_REQUIRED
+ -DMBED_BUILD_TIMESTAMP=1640167847.81
+ -DMBED_TICKLESS
+ -DSTM32H743xx
+ -DTARGET_CORTEX
+ -DTARGET_CORTEX_M
+ -DTARGET_FF_ARDUINO_UNO
+ -DTARGET_LIKE_CORTEX_M7
+ -DTARGET_LIKE_MBED
+ -DTARGET_M7
+ -DTARGET_MCU_STM32
+ -DTARGET_MCU_STM32H7
+ -DTARGET_MCU_STM32H743xI
+ -DTARGET_NAME=NUCLEO_H743ZI2
+ -DTARGET_NUCLEO_H743ZI2
+ -DTARGET_RELEASE
+ -DTARGET_RTOS_M4_M7
+ -DTARGET_STM
+ -DTARGET_STM32H7
+ -DTARGET_STM32H743xI
+ -DTOOLCHAIN_GCC
+ -DTOOLCHAIN_GCC_ARM
+ -DTRANSACTION_QUEUE_SIZE_SPI=2
+ -DUSE_FULL_LL_DRIVER
+ -DUSE_HAL_DRIVER
+ -D__CMSIS_RTOS
+ -D__CORTEX_M7
+ -D__FPU_PRESENT=1
+ -D__MBED_CMSIS_RTOS_CM
+ -D__MBED__=1
+ -DMBED_MEM_TRACING_ENABLED=0
+)
+
+include(mbed-sources.cmake)
+
+set_sources_mbed(${MbedOSSource_DIR})
+list(APPEND SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/startup_stm32h743xx.S")
+
+
+add_library(mbed_os STATIC ${SOURCES})
+
+target_include_directories_mbed(mbed_os ${MbedOSSource_DIR})
+
+
+SET_TARGET_PROPERTIES(mbed_os PROPERTIES ENABLE_EXPORTS 1)
+# add syslibs dependencies to create the correct linker order
+TARGET_LINK_LIBRARIES(mbed_os -lstdc++ -lsupc++ -lm -lc -lgcc -lnosys)
+
+add_executable(build_test main.cpp)
+
+target_link_libraries(build_test mbed_os)
+target_include_directories_mbed(build_test ${MbedOSSource_DIR})
+
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/core/reader/libluci_micro_circle_reader.a")
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/core/libluci_interpreter_core_micro.a")
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/kernels/libluci_interpreter_kernels_micro.a")
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/kernels/libluci_interpreter_mcu_pal.a")
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/loader/libluci_interpreter_loader_micro.a")
+target_link_libraries(mbed_os "${MICRO_ARM_BUILD_DIR}/luci-interpreter/src/libluci_interpreter_micro.a")
+
+target_include_directories(build_test PRIVATE
+ ${ONERTMICRO_SRC_DIR}/luci-interpreter/include
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ ${FlatBuffersSource_DIR}/include
+ )
+
+add_custom_command(TARGET build_test PRE_LINK
+ COMMAND "arm-none-eabi-cpp" -E -P -Wl,--gc-sections -Wl,--wrap,main -Wl,--wrap,_malloc_r
+ -Wl,--wrap,_free_r -Wl,--wrap,_realloc_r -Wl,--wrap,_memalign_r -Wl,--wrap,_calloc_r
+ -Wl,--wrap,exit -Wl,--wrap,atexit -Wl,-n -Wl,--wrap,printf -Wl,--wrap,sprintf
+ -Wl,--wrap,snprintf -Wl,--wrap,vprintf -Wl,--wrap,vsprintf -Wl,--wrap,vsnprintf
+ -Wl,--wrap,fprintf -Wl,--wrap,vfprintf -mcpu=cortex-m7 -mthumb -mfpu=fpv5-d16 -mfloat-abi=softfp
+ -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -fmessage-length=0
+ -fexceptions -ffunction-sections -fdata-sections -funsigned-char -MMD -fomit-frame-pointer
+ -Og -DMBED_DEBUG -DMBED_TRAP_ERRORS_ENABLED=1 -DMBED_MINIMAL_PRINTF -mcpu=cortex-m7 -mthumb
+ -mfpu=fpv5-d16 -mfloat-abi=softfp -DMBED_ROM_START=0x8000000 -DMBED_ROM_SIZE=0x200000
+ -DMBED_RAM_START=0x20000400 -DMBED_RAM_SIZE=0x1FC00 -DMBED_RAM1_START=0x24000000 -DMBED_RAM1_SIZE=0x80000
+ -DMBED_BOOT_STACK_SIZE=1024 -DXIP_ENABLE=0
+ ${MbedOSSource_DIR}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/TOOLCHAIN_GCC_ARM/STM32H743xI.ld
+ -o ${CMAKE_CURRENT_BINARY_DIR}/build_test_pp.link_script.ld
+
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ BYPRODUCTS "${CMAKE_CURRENT_BINARY_DIR}/build_test_pp.link_script.ld"
+ )
+
+add_custom_command(TARGET build_test POST_BUILD
+ COMMAND ${ELF2BIN} -O binary $<TARGET_FILE:build_test> $<TARGET_FILE:build_test>.bin
+ COMMAND ${CMAKE_COMMAND} -E echo "-- built: $<TARGET_FILE:build_test>.bin"
+ )
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// base app for qemu
+static volatile unsigned int *const UART_DR = (unsigned int *)0x40011004;
+
+void uart_print(const char *s)
+{
+ while (*s != '\0')
+ {
+ *UART_DR = *s;
+ s++;
+ }
+}
+
+int main() { uart_print("Hello, World!\n"); }
--- /dev/null
+macro(set_sources_mbed)
+ set(SOURCES
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/Include/cmsis_os2.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/Include/os_tick.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Config/RTX_Config.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Config/RTX_Config.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include/rtx_def.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include/rtx_evr.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include/rtx_os.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include1/cmsis_os.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Library/cmsis_os1.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/TOOLCHAIN_GCC/TARGET_RTOS_M4_M7/irq_cm4f.S
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_core_c.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_core_ca.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_core_cm.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_delay.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_evflags.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_evr.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_kernel.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_lib.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_lib.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_memory.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_mempool.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_msgqueue.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_mutex.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_semaphore.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_system.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_thread.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source/rtx_timer.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/Source/os_systick.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/RTOS2/Source/os_tick_ptim.c
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cachel1_armv7.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_armcc.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_armclang.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_armclang_ltm.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_compiler.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_gcc.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_iccarm.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/cmsis_version.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_armv81mml.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_armv8mbl.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_armv8mml.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm0.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm0plus.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm1.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm23.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm3.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm33.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm35p.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm4.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm55.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_cm7.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_sc000.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/core_sc300.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/mpu_armv7.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/mpu_armv8.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/pmu_armv8.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include/tz_context.h
+ ${ARGV0}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Source/mbed_tz_context.c
+ ${ARGV0}/cmsis/device/RTE/include/RTE_Components.h
+ ${ARGV0}/cmsis/device/mbed_cmsis_conf.h
+ ${ARGV0}/cmsis/device/rtos/TOOLCHAIN_GCC_ARM/mbed_boot_gcc_arm.c
+ ${ARGV0}/cmsis/device/rtos/include/mbed_boot.h
+ ${ARGV0}/cmsis/device/rtos/include/mbed_rtx_conf.h
+ ${ARGV0}/cmsis/device/rtos/include/mbed_rtx_storage.h
+ ${ARGV0}/cmsis/device/rtos/source/mbed_boot.c
+ ${ARGV0}/cmsis/device/rtos/source/mbed_rtos_rtx.c
+ ${ARGV0}/cmsis/device/rtos/source/mbed_rtx_handlers.c
+ ${ARGV0}/cmsis/device/rtos/source/mbed_rtx_idle.cpp
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/API/ATHandler.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/API/CellularContext.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/API/CellularDevice.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/API/CellularInformation.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/API/CellularNetwork.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/API/CellularSMS.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularContext.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularDevice.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularInformation.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularNetwork.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularSMS.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_CellularStack.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/AT/AT_ControlPlane_netif.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/common/APN_db.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/common/CellularCommon.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/common/CellularList.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/common/CellularLog.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/common/CellularUtil.h
+ ${ARGV0}/connectivity/cellular/include/cellular/framework/device/CellularStateMachine.h
+ ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularContext.cpp
+ ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularDevice.cpp
+ ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularInformation.cpp
+ ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularNetwork.cpp
+ ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularSMS.cpp
+ ${ARGV0}/connectivity/cellular/source/framework/AT/AT_CellularStack.cpp
+ ${ARGV0}/connectivity/cellular/source/framework/AT/AT_ControlPlane_netif.cpp
+ ${ARGV0}/connectivity/cellular/source/framework/common/APN_db.cpp
+ ${ARGV0}/connectivity/cellular/source/framework/common/CellularLog.cpp
+ ${ARGV0}/connectivity/cellular/source/framework/common/CellularUtil.cpp
+ ${ARGV0}/connectivity/cellular/source/framework/device/ATHandler.cpp
+ ${ARGV0}/connectivity/cellular/source/framework/device/CellularContext.cpp
+ ${ARGV0}/connectivity/cellular/source/framework/device/CellularDevice.cpp
+ ${ARGV0}/connectivity/cellular/source/framework/device/CellularStateMachine.cpp
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/atmel-rf-driver/NanostackRfPhyAtmel.h
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/AT86RF215Reg.h
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/AT86RFReg.h
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/NanostackRfPhyAT86RF215.cpp
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/NanostackRfPhyAtmel.cpp
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/at24mac.cpp
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/at24mac.h
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source/rfbits.h
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/mcr20a-rf-driver/NanostackRfPhyMcr20a.h
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/MCR20Drv.c
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/MCR20Drv.h
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/MCR20Overwrites.h
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/MCR20Reg.h
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/NanostackRfPhyMcr20a.cpp
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source/XcvrSpi.h
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/NanostackRfPhys2lp.cpp
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/at24mac_s2lp.cpp
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/at24mac_s2lp.h
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/rf_configuration.c
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/rf_configuration.h
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source/s2lpReg.h
+ ${ARGV0}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/stm-s2lp-rf-driver/NanostackRfPhys2lp.h
+ ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP.cpp
+ ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP.h
+ ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP_CellularContext.cpp
+ ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP_CellularContext.h
+ ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP_CellularNetwork.cpp
+ ${ARGV0}/connectivity/drivers/cellular/Altair/ALT1250/PPP/ALT1250_PPP_CellularNetwork.h
+ ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION.cpp
+ ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION.h
+ ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularContext.cpp
+ ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularContext.h
+ ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularInformation.cpp
+ ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularInformation.h
+ ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularStack.cpp
+ ${ARGV0}/connectivity/drivers/cellular/GEMALTO/CINTERION/GEMALTO_CINTERION_CellularStack.h
+ ${ARGV0}/connectivity/drivers/cellular/GENERIC/GENERIC_AT3GPP/GENERIC_AT3GPP.cpp
+ ${ARGV0}/connectivity/drivers/cellular/GENERIC/GENERIC_AT3GPP/GENERIC_AT3GPP.h
+ ${ARGV0}/connectivity/drivers/cellular/MultiTech/DragonflyNano/PPP/SARA4_PPP.cpp
+ ${ARGV0}/connectivity/drivers/cellular/MultiTech/DragonflyNano/PPP/SARA4_PPP.h
+ ${ARGV0}/connectivity/drivers/cellular/MultiTech/DragonflyNano/PPP/SARA4_PPP_CellularNetwork.cpp
+ ${ARGV0}/connectivity/drivers/cellular/MultiTech/DragonflyNano/PPP/SARA4_PPP_CellularNetwork.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularContext.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularContext.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularInformation.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularInformation.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularNetwork.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularNetwork.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularStack.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BC95/QUECTEL_BC95_CellularStack.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularContext.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularContext.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularInformation.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularInformation.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularNetwork.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularNetwork.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularStack.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_CellularStack.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_ControlPlane_netif.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/BG96/QUECTEL_BG96_ControlPlane_netif.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/EC2X/QUECTEL_EC2X.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/EC2X/QUECTEL_EC2X.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularContext.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularContext.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularInformation.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularInformation.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularStack.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/M26/QUECTEL_M26_CellularStack.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/UG96/QUECTEL_UG96.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/UG96/QUECTEL_UG96.h
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/UG96/QUECTEL_UG96_CellularContext.cpp
+ ${ARGV0}/connectivity/drivers/cellular/QUECTEL/UG96/QUECTEL_UG96_CellularContext.h
+ ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT.cpp
+ ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT.h
+ ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularContext.cpp
+ ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularContext.h
+ ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularNetwork.cpp
+ ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularNetwork.h
+ ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularStack.cpp
+ ${ARGV0}/connectivity/drivers/cellular/RiotMicro/AT/RM1000_AT_CellularStack.h
+ ${ARGV0}/connectivity/drivers/cellular/TELIT/HE910/TELIT_HE910.cpp
+ ${ARGV0}/connectivity/drivers/cellular/TELIT/HE910/TELIT_HE910.h
+ ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310.cpp
+ ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310.h
+ ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularContext.cpp
+ ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularContext.h
+ ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularNetwork.cpp
+ ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularNetwork.h
+ ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularStack.cpp
+ ${ARGV0}/connectivity/drivers/cellular/TELIT/ME310/TELIT_ME310_CellularStack.h
+ ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910.cpp
+ ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910.h
+ ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910_CellularContext.cpp
+ ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910_CellularContext.h
+ ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910_CellularNetwork.cpp
+ ${ARGV0}/connectivity/drivers/cellular/TELIT/ME910/TELIT_ME910_CellularNetwork.h
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT.cpp
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT.h
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularContext.cpp
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularContext.h
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularNetwork.cpp
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularNetwork.h
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularStack.cpp
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/AT/UBLOX_AT_CellularStack.h
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX.cpp
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX.h
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularContext.cpp
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularContext.h
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularNetwork.cpp
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularNetwork.h
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularSMS.cpp
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularSMS.h
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularStack.cpp
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/N2XX/UBLOX_N2XX_CellularStack.h
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/PPP/UBLOX_PPP.cpp
+ ${ARGV0}/connectivity/drivers/cellular/UBLOX/PPP/UBLOX_PPP.h
+ ${ARGV0}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7/TARGET_NUCLEO_H743ZI2/stm32h7_eth_init.c
+ ${ARGV0}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7/lan8742/lan8742.c
+ ${ARGV0}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7/lan8742/lan8742.h
+ ${ARGV0}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7/stm32xx_emac_config.h
+ ${ARGV0}/connectivity/drivers/emac/TARGET_STM/stm32xx_emac.cpp
+ ${ARGV0}/connectivity/drivers/emac/TARGET_STM/stm32xx_emac.h
+ ${ARGV0}/connectivity/drivers/emac/TARGET_STM/stm32xx_eth_irq_callback.cpp
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/aes_alt.cpp
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/aes_alt.h
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/aes_alt_stm32l4.c
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/aes_alt_stm32l4.h
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/ccm_alt.cpp
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/ccm_alt.h
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/cryp_stm32.c
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/cryp_stm32.h
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/gcm_alt.cpp
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/gcm_alt.h
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/hash_stm32.c
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/hash_stm32.h
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/md5_alt.cpp
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/md5_alt.h
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/sha1_alt.cpp
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/sha1_alt.h
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/sha256_alt.cpp
+ ${ARGV0}/connectivity/drivers/mbedtls/TARGET_STM/sha256_alt.h
+ ${ARGV0}/connectivity/drivers/nfc/PN512/include/nfc/controllers/PN512Driver.h
+ ${ARGV0}/connectivity/drivers/nfc/PN512/include/nfc/controllers/PN512SPITransportDriver.h
+ ${ARGV0}/connectivity/drivers/nfc/PN512/include/nfc/controllers/PN512TransportDriver.h
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/PN512Driver.cpp
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/PN512SPITransportDriver.cpp
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/PN512TransportDriver.cpp
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512.c
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512.h
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_callback.h
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_cmd.c
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_cmd.h
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_hw.c
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_hw.h
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_internal.h
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_irq.c
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_irq.h
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_poll.c
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_poll.h
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_registers.c
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_registers.h
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_rf.c
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_rf.h
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_timer.c
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_timer.h
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_transceive.c
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_transceive.h
+ ${ARGV0}/connectivity/drivers/nfc/PN512/source/transceiver/pn512_types.h
+ ${ARGV0}/connectivity/drivers/wifi/esp8266-driver/ESP8266/ESP8266.cpp
+ ${ARGV0}/connectivity/drivers/wifi/esp8266-driver/ESP8266/ESP8266.h
+ ${ARGV0}/connectivity/drivers/wifi/esp8266-driver/ESP8266Interface.cpp
+ ${ARGV0}/connectivity/drivers/wifi/esp8266-driver/ESP8266Interface.h
+ ${ARGV0}/connectivity/libraries/mbed-coap/mbed-coap/sn_coap_header.h
+ ${ARGV0}/connectivity/libraries/mbed-coap/mbed-coap/sn_coap_protocol.h
+ ${ARGV0}/connectivity/libraries/mbed-coap/mbed-coap/sn_config.h
+ ${ARGV0}/connectivity/libraries/mbed-coap/source/include/sn_coap_header_internal.h
+ ${ARGV0}/connectivity/libraries/mbed-coap/source/include/sn_coap_protocol_internal.h
+ ${ARGV0}/connectivity/libraries/mbed-coap/source/sn_coap_builder.c
+ ${ARGV0}/connectivity/libraries/mbed-coap/source/sn_coap_header_check.c
+ ${ARGV0}/connectivity/libraries/mbed-coap/source/sn_coap_parser.c
+ ${ARGV0}/connectivity/libraries/mbed-coap/source/sn_coap_protocol.c
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/common_functions.h
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ip4string.h
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ip6string.h
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ip_fsc.h
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ns_list.h
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ns_nvm_helper.h
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/ns_types.h
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/nsdynmemLIB.h
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/nsdynmem_tracker.h
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/nsdynmem_tracker_lib.h
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/platform/arm_hal_interrupt.h
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/platform/arm_hal_nvm.h
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/source/IPv6_fcf_lib/ip_fsc.c
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libBits/common_functions.c
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libList/ns_list.c
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libip4string/ip4tos.c
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libip4string/stoip4.c
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libip6string/ip6tos.c
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/source/libip6string/stoip6.c
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/source/nsdynmemLIB/nsdynmemLIB.c
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/source/nsdynmemtracker/nsdynmem_tracker_lib.c
+ ${ARGV0}/connectivity/libraries/nanostack-libservice/source/nvmHelper/ns_nvm_helper.c
+ ${ARGV0}/connectivity/libraries/ppp/include/polarssl/arc4.h
+ ${ARGV0}/connectivity/libraries/ppp/include/polarssl/des.h
+ ${ARGV0}/connectivity/libraries/ppp/include/polarssl/md4.h
+ ${ARGV0}/connectivity/libraries/ppp/include/polarssl/md5.h
+ ${ARGV0}/connectivity/libraries/ppp/include/polarssl/sha1.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/ccp.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/chap-md5.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/chap-new.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/chap_ms.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/eap.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/ecp.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/eui64.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/fsm.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/ipcp.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/ipv6cp.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/lcp.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/magic.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/mppe.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/ppp.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/ppp_impl.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/ppp_opts.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/ppp_service.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/ppp_service_if.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppapi.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppcrypt.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppdebug.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppoe.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppol2tp.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/pppos.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/upap.h
+ ${ARGV0}/connectivity/libraries/ppp/include/ppp/vj.h
+ ${ARGV0}/connectivity/libraries/ppp/source/auth.c
+ ${ARGV0}/connectivity/libraries/ppp/source/ccp.c
+ ${ARGV0}/connectivity/libraries/ppp/source/chap-md5.c
+ ${ARGV0}/connectivity/libraries/ppp/source/chap-new.c
+ ${ARGV0}/connectivity/libraries/ppp/source/chap_ms.c
+ ${ARGV0}/connectivity/libraries/ppp/source/demand.c
+ ${ARGV0}/connectivity/libraries/ppp/source/eap.c
+ ${ARGV0}/connectivity/libraries/ppp/source/eui64.c
+ ${ARGV0}/connectivity/libraries/ppp/source/fsm.c
+ ${ARGV0}/connectivity/libraries/ppp/source/ipcp.c
+ ${ARGV0}/connectivity/libraries/ppp/source/ipv6cp.c
+ ${ARGV0}/connectivity/libraries/ppp/source/lcp.c
+ ${ARGV0}/connectivity/libraries/ppp/source/magic.c
+ ${ARGV0}/connectivity/libraries/ppp/source/mppe.c
+ ${ARGV0}/connectivity/libraries/ppp/source/multilink.c
+ ${ARGV0}/connectivity/libraries/ppp/source/polarssl/ppp_arc4.c
+ ${ARGV0}/connectivity/libraries/ppp/source/polarssl/ppp_des.c
+ ${ARGV0}/connectivity/libraries/ppp/source/polarssl/ppp_md4.c
+ ${ARGV0}/connectivity/libraries/ppp/source/polarssl/ppp_md5.c
+ ${ARGV0}/connectivity/libraries/ppp/source/polarssl/ppp_sha1.c
+ ${ARGV0}/connectivity/libraries/ppp/source/ppp.c
+ ${ARGV0}/connectivity/libraries/ppp/source/ppp_ecp.c
+ ${ARGV0}/connectivity/libraries/ppp/source/ppp_service.cpp
+ ${ARGV0}/connectivity/libraries/ppp/source/ppp_service_if.cpp
+ ${ARGV0}/connectivity/libraries/ppp/source/pppapi.c
+ ${ARGV0}/connectivity/libraries/ppp/source/pppcrypt.c
+ ${ARGV0}/connectivity/libraries/ppp/source/pppoe.c
+ ${ARGV0}/connectivity/libraries/ppp/source/pppol2tp.c
+ ${ARGV0}/connectivity/libraries/ppp/source/pppos.cpp
+ ${ARGV0}/connectivity/libraries/ppp/source/upap.c
+ ${ARGV0}/connectivity/libraries/ppp/source/utils.c
+ ${ARGV0}/connectivity/libraries/ppp/source/vj.c
+ ${ARGV0}/connectivity/lorawan/include/lorawan/LoRaRadio.h
+ ${ARGV0}/connectivity/lorawan/include/lorawan/LoRaWANBase.h
+ ${ARGV0}/connectivity/lorawan/include/lorawan/LoRaWANInterface.h
+ ${ARGV0}/connectivity/lorawan/include/lorawan/LoRaWANStack.h
+ ${ARGV0}/connectivity/lorawan/include/lorawan/lorawan_types.h
+ ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMac.cpp
+ ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMac.h
+ ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacChannelPlan.cpp
+ ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacChannelPlan.h
+ ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacCommand.cpp
+ ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacCommand.h
+ ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacCrypto.cpp
+ ${ARGV0}/connectivity/lorawan/lorastack/mac/LoRaMacCrypto.h
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHY.cpp
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHY.h
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYAS923.cpp
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYAS923.h
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYAU915.cpp
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYAU915.h
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYCN470.cpp
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYCN470.h
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYCN779.cpp
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYCN779.h
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYEU433.cpp
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYEU433.h
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYEU868.cpp
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYEU868.h
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYIN865.cpp
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYIN865.h
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYKR920.cpp
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYKR920.h
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYUS915.cpp
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/LoRaPHYUS915.h
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/lora_phy_ds.h
+ ${ARGV0}/connectivity/lorawan/lorastack/phy/loraphy_target.h
+ ${ARGV0}/connectivity/lorawan/source/LoRaWANInterface.cpp
+ ${ARGV0}/connectivity/lorawan/source/LoRaWANStack.cpp
+ ${ARGV0}/connectivity/lorawan/system/LoRaWANTimer.cpp
+ ${ARGV0}/connectivity/lorawan/system/LoRaWANTimer.h
+ ${ARGV0}/connectivity/lorawan/system/lorawan_data_structures.h
+ ${ARGV0}/connectivity/nfc/include/nfc/NFC.h
+ ${ARGV0}/connectivity/nfc/include/nfc/NFCController.h
+ ${ARGV0}/connectivity/nfc/include/nfc/NFCControllerDriver.h
+ ${ARGV0}/connectivity/nfc/include/nfc/NFCDefinitions.h
+ ${ARGV0}/connectivity/nfc/include/nfc/NFCEEPROM.h
+ ${ARGV0}/connectivity/nfc/include/nfc/NFCEEPROMDriver.h
+ ${ARGV0}/connectivity/nfc/include/nfc/NFCNDEFCapable.h
+ ${ARGV0}/connectivity/nfc/include/nfc/NFCRemoteEndpoint.h
+ ${ARGV0}/connectivity/nfc/include/nfc/NFCRemoteInitiator.h
+ ${ARGV0}/connectivity/nfc/include/nfc/NFCTarget.h
+ ${ARGV0}/connectivity/nfc/include/nfc/Type4RemoteInitiator.h
+ ${ARGV0}/connectivity/nfc/include/nfc/ndef/MessageBuilder.h
+ ${ARGV0}/connectivity/nfc/include/nfc/ndef/MessageParser.h
+ ${ARGV0}/connectivity/nfc/include/nfc/ndef/Record.h
+ ${ARGV0}/connectivity/nfc/include/nfc/ndef/RecordParser.h
+ ${ARGV0}/connectivity/nfc/include/nfc/ndef/common/Mime.h
+ ${ARGV0}/connectivity/nfc/include/nfc/ndef/common/SimpleMessageParser.h
+ ${ARGV0}/connectivity/nfc/include/nfc/ndef/common/Text.h
+ ${ARGV0}/connectivity/nfc/include/nfc/ndef/common/URI.h
+ ${ARGV0}/connectivity/nfc/include/nfc/ndef/common/util.h
+ ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_buffer.h
+ ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_buffer_builder.h
+ ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_buffer_reader.h
+ ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_debug.h
+ ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_macros.h
+ ${ARGV0}/connectivity/nfc/libraries/acore/acore/ac_stream.h
+ ${ARGV0}/connectivity/nfc/libraries/acore/source/ac_buffer.c
+ ${ARGV0}/connectivity/nfc/libraries/acore/source/ac_buffer_builder.c
+ ${ARGV0}/connectivity/nfc/libraries/acore/source/ac_buffer_reader.c
+ ${ARGV0}/connectivity/nfc/libraries/acore/source/ac_stream.c
+ ${ARGV0}/connectivity/nfc/libraries/stack/ndef/ndef.c
+ ${ARGV0}/connectivity/nfc/libraries/stack/ndef/ndef.h
+ ${ARGV0}/connectivity/nfc/libraries/stack/nfc_common.h
+ ${ARGV0}/connectivity/nfc/libraries/stack/nfc_errors.h
+ ${ARGV0}/connectivity/nfc/libraries/stack/platform/nfc_debug.h
+ ${ARGV0}/connectivity/nfc/libraries/stack/platform/nfc_scheduler.c
+ ${ARGV0}/connectivity/nfc/libraries/stack/platform/nfc_scheduler.h
+ ${ARGV0}/connectivity/nfc/libraries/stack/platform/nfc_transport.c
+ ${ARGV0}/connectivity/nfc/libraries/stack/platform/nfc_transport.h
+ ${ARGV0}/connectivity/nfc/libraries/stack/tech/iso7816/iso7816.c
+ ${ARGV0}/connectivity/nfc/libraries/stack/tech/iso7816/iso7816.h
+ ${ARGV0}/connectivity/nfc/libraries/stack/tech/iso7816/iso7816_app.c
+ ${ARGV0}/connectivity/nfc/libraries/stack/tech/iso7816/iso7816_app.h
+ ${ARGV0}/connectivity/nfc/libraries/stack/tech/iso7816/iso7816_defs.h
+ ${ARGV0}/connectivity/nfc/libraries/stack/tech/isodep/isodep.h
+ ${ARGV0}/connectivity/nfc/libraries/stack/tech/isodep/isodep_target.c
+ ${ARGV0}/connectivity/nfc/libraries/stack/tech/isodep/isodep_target.h
+ ${ARGV0}/connectivity/nfc/libraries/stack/tech/type4/type4_target.c
+ ${ARGV0}/connectivity/nfc/libraries/stack/tech/type4/type4_target.h
+ ${ARGV0}/connectivity/nfc/libraries/stack/transceiver/protocols.h
+ ${ARGV0}/connectivity/nfc/libraries/stack/transceiver/transceiver.c
+ ${ARGV0}/connectivity/nfc/libraries/stack/transceiver/transceiver.h
+ ${ARGV0}/connectivity/nfc/libraries/stack/transceiver/transceiver_internal.h
+ ${ARGV0}/connectivity/nfc/source/NFCController.cpp
+ ${ARGV0}/connectivity/nfc/source/NFCControllerDriver.cpp
+ ${ARGV0}/connectivity/nfc/source/NFCEEPROM.cpp
+ ${ARGV0}/connectivity/nfc/source/NFCEEPROMDriver.cpp
+ ${ARGV0}/connectivity/nfc/source/NFCNDEFCapable.cpp
+ ${ARGV0}/connectivity/nfc/source/NFCRemoteEndpoint.cpp
+ ${ARGV0}/connectivity/nfc/source/NFCRemoteInitiator.cpp
+ ${ARGV0}/connectivity/nfc/source/NFCTarget.cpp
+ ${ARGV0}/connectivity/nfc/source/Type4RemoteInitiator.cpp
+ ${ARGV0}/connectivity/nfc/source/ndef/MessageBuilder.cpp
+ ${ARGV0}/connectivity/nfc/source/ndef/MessageParser.cpp
+ ${ARGV0}/connectivity/nfc/source/ndef/RecordParser.cpp
+ ${ARGV0}/connectivity/nfc/source/ndef/common/Mime.cpp
+ ${ARGV0}/connectivity/nfc/source/ndef/common/SimpleMessageParser.cpp
+ ${ARGV0}/connectivity/nfc/source/ndef/common/Text.cpp
+ ${ARGV0}/connectivity/nfc/source/ndef/common/URI.cpp
+ ${ARGV0}/connectivity/nfc/source/ndef/common/util.cpp
+ ${ARGV0}/drivers/device_key/include/device_key/DeviceKey.h
+ ${ARGV0}/drivers/device_key/source/DeviceKey.cpp
+ ${ARGV0}/drivers/include/drivers/AnalogIn.h
+ ${ARGV0}/drivers/include/drivers/AnalogOut.h
+ ${ARGV0}/drivers/include/drivers/BufferedSerial.h
+ ${ARGV0}/drivers/include/drivers/BusIn.h
+ ${ARGV0}/drivers/include/drivers/BusInOut.h
+ ${ARGV0}/drivers/include/drivers/BusOut.h
+ ${ARGV0}/drivers/include/drivers/CAN.h
+ ${ARGV0}/drivers/include/drivers/DigitalIn.h
+ ${ARGV0}/drivers/include/drivers/DigitalInOut.h
+ ${ARGV0}/drivers/include/drivers/DigitalOut.h
+ ${ARGV0}/drivers/include/drivers/FlashIAP.h
+ ${ARGV0}/drivers/include/drivers/HighResClock.h
+ ${ARGV0}/drivers/include/drivers/I2C.h
+ ${ARGV0}/drivers/include/drivers/I2CSlave.h
+ ${ARGV0}/drivers/include/drivers/InterruptIn.h
+ ${ARGV0}/drivers/include/drivers/LowPowerClock.h
+ ${ARGV0}/drivers/include/drivers/LowPowerTicker.h
+ ${ARGV0}/drivers/include/drivers/LowPowerTimeout.h
+ ${ARGV0}/drivers/include/drivers/LowPowerTimer.h
+ ${ARGV0}/drivers/include/drivers/MbedCRC.h
+ ${ARGV0}/drivers/include/drivers/OSPI.h
+ ${ARGV0}/drivers/include/drivers/PortIn.h
+ ${ARGV0}/drivers/include/drivers/PortInOut.h
+ ${ARGV0}/drivers/include/drivers/PortOut.h
+ ${ARGV0}/drivers/include/drivers/PwmOut.h
+ ${ARGV0}/drivers/include/drivers/QSPI.h
+ ${ARGV0}/drivers/include/drivers/RawCAN.h
+ ${ARGV0}/drivers/include/drivers/RealTimeClock.h
+ ${ARGV0}/drivers/include/drivers/ResetReason.h
+ ${ARGV0}/drivers/include/drivers/SPI.h
+ ${ARGV0}/drivers/include/drivers/SPISlave.h
+ ${ARGV0}/drivers/include/drivers/SerialBase.h
+ ${ARGV0}/drivers/include/drivers/SerialWireOutput.h
+ ${ARGV0}/drivers/include/drivers/Ticker.h
+ ${ARGV0}/drivers/include/drivers/TickerDataClock.h
+ ${ARGV0}/drivers/include/drivers/Timeout.h
+ ${ARGV0}/drivers/include/drivers/Timer.h
+ ${ARGV0}/drivers/include/drivers/TimerEvent.h
+ ${ARGV0}/drivers/include/drivers/UnbufferedSerial.h
+ ${ARGV0}/drivers/include/drivers/Watchdog.h
+ ${ARGV0}/drivers/include/drivers/interfaces/InterfaceCAN.h
+ ${ARGV0}/drivers/include/drivers/interfaces/InterfaceDigitalIn.h
+ ${ARGV0}/drivers/include/drivers/interfaces/InterfaceDigitalInOut.h
+ ${ARGV0}/drivers/include/drivers/interfaces/InterfaceDigitalOut.h
+ ${ARGV0}/drivers/source/AnalogIn.cpp
+ ${ARGV0}/drivers/source/AnalogOut.cpp
+ ${ARGV0}/drivers/source/BufferedSerial.cpp
+ ${ARGV0}/drivers/source/BusIn.cpp
+ ${ARGV0}/drivers/source/BusInOut.cpp
+ ${ARGV0}/drivers/source/BusOut.cpp
+ ${ARGV0}/drivers/source/CAN.cpp
+ ${ARGV0}/drivers/source/DigitalIn.cpp
+ ${ARGV0}/drivers/source/DigitalInOut.cpp
+ ${ARGV0}/drivers/source/DigitalOut.cpp
+ ${ARGV0}/drivers/source/FlashIAP.cpp
+ ${ARGV0}/drivers/source/I2C.cpp
+ ${ARGV0}/drivers/source/I2CSlave.cpp
+ ${ARGV0}/drivers/source/InterruptIn.cpp
+ ${ARGV0}/drivers/source/MbedCRC.cpp
+ ${ARGV0}/drivers/source/OSPI.cpp
+ ${ARGV0}/drivers/source/PortIn.cpp
+ ${ARGV0}/drivers/source/PortInOut.cpp
+ ${ARGV0}/drivers/source/PortOut.cpp
+ ${ARGV0}/drivers/source/PwmOut.cpp
+ ${ARGV0}/drivers/source/QSPI.cpp
+ ${ARGV0}/drivers/source/ResetReason.cpp
+ ${ARGV0}/drivers/source/SPI.cpp
+ ${ARGV0}/drivers/source/SPISlave.cpp
+ ${ARGV0}/drivers/source/SerialBase.cpp
+ ${ARGV0}/drivers/source/SerialWireOutput.cpp
+ ${ARGV0}/drivers/source/Ticker.cpp
+ ${ARGV0}/drivers/source/Timeout.cpp
+ ${ARGV0}/drivers/source/Timer.cpp
+ ${ARGV0}/drivers/source/TimerEvent.cpp
+ ${ARGV0}/drivers/source/UnbufferedSerial.cpp
+ ${ARGV0}/drivers/source/Watchdog.cpp
+ ${ARGV0}/drivers/usb/include/usb/USBAudio.h
+ ${ARGV0}/drivers/usb/include/usb/USBCDC.h
+ ${ARGV0}/drivers/usb/include/usb/USBCDC_ECM.h
+ ${ARGV0}/drivers/usb/include/usb/USBHID.h
+ ${ARGV0}/drivers/usb/include/usb/USBKeyboard.h
+ ${ARGV0}/drivers/usb/include/usb/USBMIDI.h
+ ${ARGV0}/drivers/usb/include/usb/USBMSD.h
+ ${ARGV0}/drivers/usb/include/usb/USBMouse.h
+ ${ARGV0}/drivers/usb/include/usb/USBMouseKeyboard.h
+ ${ARGV0}/drivers/usb/include/usb/USBSerial.h
+ ${ARGV0}/drivers/usb/include/usb/internal/AsyncOp.h
+ ${ARGV0}/drivers/usb/include/usb/internal/ByteBuffer.h
+ ${ARGV0}/drivers/usb/include/usb/internal/EndpointResolver.h
+ ${ARGV0}/drivers/usb/include/usb/internal/LinkEntry.h
+ ${ARGV0}/drivers/usb/include/usb/internal/LinkedList.h
+ ${ARGV0}/drivers/usb/include/usb/internal/LinkedListBase.h
+ ${ARGV0}/drivers/usb/include/usb/internal/MIDIMessage.h
+ ${ARGV0}/drivers/usb/include/usb/internal/OperationList.h
+ ${ARGV0}/drivers/usb/include/usb/internal/OperationListBase.h
+ ${ARGV0}/drivers/usb/include/usb/internal/PolledQueue.h
+ ${ARGV0}/drivers/usb/include/usb/internal/Task.h
+ ${ARGV0}/drivers/usb/include/usb/internal/TaskBase.h
+ ${ARGV0}/drivers/usb/include/usb/internal/TaskQueue.h
+ ${ARGV0}/drivers/usb/include/usb/internal/USBAudio_Types.h
+ ${ARGV0}/drivers/usb/include/usb/internal/USBDescriptor.h
+ ${ARGV0}/drivers/usb/include/usb/internal/USBDevice.h
+ ${ARGV0}/drivers/usb/include/usb/internal/USBDevice_Types.h
+ ${ARGV0}/drivers/usb/include/usb/internal/USBHID_Types.h
+ ${ARGV0}/drivers/usb/source/AsyncOp.cpp
+ ${ARGV0}/drivers/usb/source/ByteBuffer.cpp
+ ${ARGV0}/drivers/usb/source/EndpointResolver.cpp
+ ${ARGV0}/drivers/usb/source/LinkedListBase.cpp
+ ${ARGV0}/drivers/usb/source/OperationListBase.cpp
+ ${ARGV0}/drivers/usb/source/PolledQueue.cpp
+ ${ARGV0}/drivers/usb/source/TaskBase.cpp
+ ${ARGV0}/drivers/usb/source/USBAudio.cpp
+ ${ARGV0}/drivers/usb/source/USBCDC.cpp
+ ${ARGV0}/drivers/usb/source/USBCDC_ECM.cpp
+ ${ARGV0}/drivers/usb/source/USBDevice.cpp
+ ${ARGV0}/drivers/usb/source/USBHID.cpp
+ ${ARGV0}/drivers/usb/source/USBKeyboard.cpp
+ ${ARGV0}/drivers/usb/source/USBMIDI.cpp
+ ${ARGV0}/drivers/usb/source/USBMSD.cpp
+ ${ARGV0}/drivers/usb/source/USBMouse.cpp
+ ${ARGV0}/drivers/usb/source/USBMouseKeyboard.cpp
+ ${ARGV0}/drivers/usb/source/USBSerial.cpp
+ ${ARGV0}/events/include/events/Event.h
+ ${ARGV0}/events/include/events/EventQueue.h
+ ${ARGV0}/events/include/events/UserAllocatedEvent.h
+ ${ARGV0}/events/include/events/equeue.h
+ ${ARGV0}/events/include/events/internal/equeue_platform.h
+ ${ARGV0}/events/include/events/mbed_events.h
+ ${ARGV0}/events/include/events/mbed_shared_queues.h
+ ${ARGV0}/events/source/EventQueue.cpp
+ ${ARGV0}/events/source/equeue.c
+ ${ARGV0}/events/source/equeue_mbed.cpp
+ ${ARGV0}/events/source/equeue_posix.c
+ ${ARGV0}/events/source/mbed_shared_queues.cpp
+ ${ARGV0}/features/frameworks/greentea-client/greentea-client/greentea_metrics.h
+ ${ARGV0}/features/frameworks/greentea-client/greentea-client/test_env.h
+ ${ARGV0}/features/frameworks/greentea-client/source/greentea_metrics.cpp
+ ${ARGV0}/features/frameworks/greentea-client/source/greentea_test_env.cpp
+ ${ARGV0}/features/frameworks/mbed-client-cli/mbed-client-cli/ns_cmdline.h
+ ${ARGV0}/features/frameworks/mbed-client-cli/source/ns_cmdline.c
+ ${ARGV0}/features/frameworks/mbed-greentea-io/mbed_io.cpp
+ ${ARGV0}/features/frameworks/unity/source/unity.c
+ ${ARGV0}/features/frameworks/unity/unity/unity.h
+ ${ARGV0}/features/frameworks/unity/unity/unity_config.h
+ ${ARGV0}/features/frameworks/unity/unity/unity_internals.h
+ ${ARGV0}/features/frameworks/utest/mbed-utest-shim.cpp
+ ${ARGV0}/features/frameworks/utest/source/unity_handler.cpp
+ ${ARGV0}/features/frameworks/utest/source/utest_case.cpp
+ ${ARGV0}/features/frameworks/utest/source/utest_default_handlers.cpp
+ ${ARGV0}/features/frameworks/utest/source/utest_greentea_handlers.cpp
+ ${ARGV0}/features/frameworks/utest/source/utest_harness.cpp
+ ${ARGV0}/features/frameworks/utest/source/utest_print.cpp
+ ${ARGV0}/features/frameworks/utest/source/utest_shim.cpp
+ ${ARGV0}/features/frameworks/utest/source/utest_stack_trace.cpp
+ ${ARGV0}/features/frameworks/utest/source/utest_types.cpp
+ ${ARGV0}/features/frameworks/utest/utest/unity_handler.h
+ ${ARGV0}/features/frameworks/utest/utest/utest.h
+ ${ARGV0}/features/frameworks/utest/utest/utest_case.h
+ ${ARGV0}/features/frameworks/utest/utest/utest_default_handlers.h
+ ${ARGV0}/features/frameworks/utest/utest/utest_harness.h
+ ${ARGV0}/features/frameworks/utest/utest/utest_print.h
+ ${ARGV0}/features/frameworks/utest/utest/utest_scheduler.h
+ ${ARGV0}/features/frameworks/utest/utest/utest_shim.h
+ ${ARGV0}/features/frameworks/utest/utest/utest_specification.h
+ ${ARGV0}/features/frameworks/utest/utest/utest_stack_trace.h
+ ${ARGV0}/features/frameworks/utest/utest/utest_types.h
+ ${ARGV0}/hal/include/hal/LowPowerTickerWrapper.h
+ ${ARGV0}/hal/include/hal/PinNameAliases.h
+ ${ARGV0}/hal/include/hal/analogin_api.h
+ ${ARGV0}/hal/include/hal/analogout_api.h
+ ${ARGV0}/hal/include/hal/buffer.h
+ ${ARGV0}/hal/include/hal/can_api.h
+ ${ARGV0}/hal/include/hal/can_helper.h
+ ${ARGV0}/hal/include/hal/crc_api.h
+ ${ARGV0}/hal/include/hal/critical_section_api.h
+ ${ARGV0}/hal/include/hal/dma_api.h
+ ${ARGV0}/hal/include/hal/flash_api.h
+ ${ARGV0}/hal/include/hal/gpio_api.h
+ ${ARGV0}/hal/include/hal/gpio_irq_api.h
+ ${ARGV0}/hal/include/hal/i2c_api.h
+ ${ARGV0}/hal/include/hal/itm_api.h
+ ${ARGV0}/hal/include/hal/lp_ticker_api.h
+ ${ARGV0}/hal/include/hal/mbed_lp_ticker_wrapper.h
+ ${ARGV0}/hal/include/hal/mpu_api.h
+ ${ARGV0}/hal/include/hal/ospi_api.h
+ ${ARGV0}/hal/include/hal/pinmap.h
+ ${ARGV0}/hal/include/hal/port_api.h
+ ${ARGV0}/hal/include/hal/pwmout_api.h
+ ${ARGV0}/hal/include/hal/qspi_api.h
+ ${ARGV0}/hal/include/hal/reset_reason_api.h
+ ${ARGV0}/hal/include/hal/rtc_api.h
+ ${ARGV0}/hal/include/hal/serial_api.h
+ ${ARGV0}/hal/include/hal/sleep_api.h
+ ${ARGV0}/hal/include/hal/spi_api.h
+ ${ARGV0}/hal/include/hal/static_pinmap.h
+ ${ARGV0}/hal/include/hal/ticker_api.h
+ ${ARGV0}/hal/include/hal/trng_api.h
+ ${ARGV0}/hal/include/hal/us_ticker_api.h
+ ${ARGV0}/hal/include/hal/watchdog_api.h
+ ${ARGV0}/hal/source/LowPowerTickerWrapper.cpp
+ ${ARGV0}/hal/source/mbed_compat.c
+ ${ARGV0}/hal/source/mbed_critical_section_api.c
+ ${ARGV0}/hal/source/mbed_flash_api.c
+ ${ARGV0}/hal/source/mbed_gpio.c
+ ${ARGV0}/hal/source/mbed_gpio_irq.c
+ ${ARGV0}/hal/source/mbed_itm_api.c
+ ${ARGV0}/hal/source/mbed_lp_ticker_api.c
+ ${ARGV0}/hal/source/mbed_lp_ticker_wrapper.cpp
+ ${ARGV0}/hal/source/mbed_pinmap_common.c
+ ${ARGV0}/hal/source/mbed_pinmap_default.cpp
+ ${ARGV0}/hal/source/mbed_ticker_api.c
+ ${ARGV0}/hal/source/mbed_us_ticker_api.c
+ ${ARGV0}/hal/source/mpu/mbed_mpu_v7m.c
+ ${ARGV0}/hal/source/mpu/mbed_mpu_v8m.c
+ ${ARGV0}/hal/source/static_pinmap.cpp
+ ${ARGV0}/hal/usb/include/usb/USBPhy.h
+ ${ARGV0}/hal/usb/include/usb/USBPhyEvents.h
+ ${ARGV0}/hal/usb/include/usb/USBPhyTypes.h
+ ${ARGV0}/hal/usb/include/usb/usb_phy_api.h
+ ${ARGV0}/hal/usb/source/mbed_usb_phy.cpp
+ ${ARGV0}/mbed.h
+ ${ARGV0}/platform/cxxsupport/mstd_algorithm
+ ${ARGV0}/platform/cxxsupport/mstd_atomic
+ ${ARGV0}/platform/cxxsupport/mstd_cstddef
+ ${ARGV0}/platform/cxxsupport/mstd_functional
+ ${ARGV0}/platform/cxxsupport/mstd_iterator
+ ${ARGV0}/platform/cxxsupport/mstd_memory
+ ${ARGV0}/platform/cxxsupport/mstd_mutex
+ ${ARGV0}/platform/cxxsupport/mstd_mutex.cpp
+ ${ARGV0}/platform/cxxsupport/mstd_new
+ ${ARGV0}/platform/cxxsupport/mstd_span
+ ${ARGV0}/platform/cxxsupport/mstd_tuple
+ ${ARGV0}/platform/cxxsupport/mstd_type_traits
+ ${ARGV0}/platform/cxxsupport/mstd_utility
+ ${ARGV0}/platform/include/platform/ATCmdParser.h
+ ${ARGV0}/platform/include/platform/CThunk.h
+ ${ARGV0}/platform/include/platform/Callback.h
+ ${ARGV0}/platform/include/platform/CircularBuffer.h
+ ${ARGV0}/platform/include/platform/CriticalSectionLock.h
+ ${ARGV0}/platform/include/platform/DeepSleepLock.h
+ ${ARGV0}/platform/include/platform/DirHandle.h
+ ${ARGV0}/platform/include/platform/FileBase.h
+ ${ARGV0}/platform/include/platform/FileHandle.h
+ ${ARGV0}/platform/include/platform/FileLike.h
+ ${ARGV0}/platform/include/platform/FilePath.h
+ ${ARGV0}/platform/include/platform/FileSystemHandle.h
+ ${ARGV0}/platform/include/platform/FileSystemLike.h
+ ${ARGV0}/platform/include/platform/LocalFileSystem.h
+ ${ARGV0}/platform/include/platform/NonCopyable.h
+ ${ARGV0}/platform/include/platform/PlatformMutex.h
+ ${ARGV0}/platform/include/platform/ScopedLock.h
+ ${ARGV0}/platform/include/platform/ScopedRamExecutionLock.h
+ ${ARGV0}/platform/include/platform/ScopedRomWriteLock.h
+ ${ARGV0}/platform/include/platform/SharedPtr.h
+ ${ARGV0}/platform/include/platform/SingletonPtr.h
+ ${ARGV0}/platform/include/platform/Span.h
+ ${ARGV0}/platform/include/platform/Stream.h
+ ${ARGV0}/platform/include/platform/Transaction.h
+ ${ARGV0}/platform/include/platform/internal/CThunkBase.h
+ ${ARGV0}/platform/include/platform/internal/SysTimer.h
+ ${ARGV0}/platform/include/platform/internal/mbed_atomic_impl.h
+ ${ARGV0}/platform/include/platform/internal/mbed_error_hist.h
+ ${ARGV0}/platform/include/platform/internal/mbed_fault_handler.h
+ ${ARGV0}/platform/include/platform/internal/mbed_os_timer.h
+ ${ARGV0}/platform/include/platform/mbed_application.h
+ ${ARGV0}/platform/include/platform/mbed_assert.h
+ ${ARGV0}/platform/include/platform/mbed_atomic.h
+ ${ARGV0}/platform/include/platform/mbed_chrono.h
+ ${ARGV0}/platform/include/platform/mbed_critical.h
+ ${ARGV0}/platform/include/platform/mbed_debug.h
+ ${ARGV0}/platform/include/platform/mbed_enum_flags.h
+ ${ARGV0}/platform/include/platform/mbed_error.h
+ ${ARGV0}/platform/include/platform/mbed_interface.h
+ ${ARGV0}/platform/include/platform/mbed_mem_trace.h
+ ${ARGV0}/platform/include/platform/mbed_mktime.h
+ ${ARGV0}/platform/include/platform/mbed_mpu_mgmt.h
+ ${ARGV0}/platform/include/platform/mbed_poll.h
+ ${ARGV0}/platform/include/platform/mbed_power_mgmt.h
+ ${ARGV0}/platform/include/platform/mbed_preprocessor.h
+ ${ARGV0}/platform/include/platform/mbed_retarget.h
+ ${ARGV0}/platform/include/platform/mbed_rtc_time.h
+ ${ARGV0}/platform/include/platform/mbed_semihost_api.h
+ ${ARGV0}/platform/include/platform/mbed_stats.h
+ ${ARGV0}/platform/include/platform/mbed_thread.h
+ ${ARGV0}/platform/include/platform/mbed_toolchain.h
+ ${ARGV0}/platform/include/platform/mbed_version.h
+ ${ARGV0}/platform/include/platform/mbed_wait_api.h
+ ${ARGV0}/platform/include/platform/platform.h
+ ${ARGV0}/platform/mbed-trace/include/mbed-trace/mbed_trace.h
+ ${ARGV0}/platform/mbed-trace/include/mbed-trace/ns_trace.h
+ ${ARGV0}/platform/mbed-trace/source/mbed_trace.c
+ ${ARGV0}/platform/randlib/include/mbed-client-randlib/platform/arm_hal_random.h
+ ${ARGV0}/platform/randlib/include/mbed-client-randlib/randLIB.h
+ ${ARGV0}/platform/randlib/source/randLIB.c
+ ${ARGV0}/platform/source/ATCmdParser.cpp
+ ${ARGV0}/platform/source/CThunkBase.cpp
+ ${ARGV0}/platform/source/CriticalSectionLock.cpp
+ ${ARGV0}/platform/source/DeepSleepLock.cpp
+ ${ARGV0}/platform/source/FileBase.cpp
+ ${ARGV0}/platform/source/FileHandle.cpp
+ ${ARGV0}/platform/source/FilePath.cpp
+ ${ARGV0}/platform/source/FileSystemHandle.cpp
+ ${ARGV0}/platform/source/LocalFileSystem.cpp
+ ${ARGV0}/platform/source/Stream.cpp
+ ${ARGV0}/platform/source/SysTimer.cpp
+ ${ARGV0}/platform/source/TARGET_CORTEX_M/TOOLCHAIN_GCC/except.S
+ ${ARGV0}/platform/source/TARGET_CORTEX_M/mbed_fault_handler.c
+ ${ARGV0}/platform/source/mbed_alloc_wrappers.cpp
+ ${ARGV0}/platform/source/mbed_application.c
+ ${ARGV0}/platform/source/mbed_assert.c
+ ${ARGV0}/platform/source/mbed_atomic_impl.c
+ ${ARGV0}/platform/source/mbed_board.c
+ ${ARGV0}/platform/source/mbed_crash_data_offsets.h
+ ${ARGV0}/platform/source/mbed_critical.c
+ ${ARGV0}/platform/source/mbed_error.c
+ ${ARGV0}/platform/source/mbed_error_hist.c
+ ${ARGV0}/platform/source/mbed_interface.c
+ ${ARGV0}/platform/source/mbed_mem_trace.cpp
+ ${ARGV0}/platform/source/mbed_mktime.c
+ ${ARGV0}/platform/source/mbed_mpu_mgmt.c
+ ${ARGV0}/platform/source/mbed_os_timer.cpp
+ ${ARGV0}/platform/source/mbed_poll.cpp
+ ${ARGV0}/platform/source/mbed_power_mgmt.c
+ ${ARGV0}/platform/source/mbed_retarget.cpp
+ ${ARGV0}/platform/source/mbed_rtc_time.cpp
+ ${ARGV0}/platform/source/mbed_sdk_boot.c
+ ${ARGV0}/platform/source/mbed_semihost_api.c
+ ${ARGV0}/platform/source/mbed_stats.c
+ ${ARGV0}/platform/source/mbed_thread.cpp
+ ${ARGV0}/platform/source/mbed_wait_api_no_rtos.c
+ ${ARGV0}/platform/source/minimal-printf/mbed_printf_armlink_overrides.c
+ ${ARGV0}/platform/source/minimal-printf/mbed_printf_implementation.c
+ ${ARGV0}/platform/source/minimal-printf/mbed_printf_implementation.h
+ ${ARGV0}/platform/source/minimal-printf/mbed_printf_wrapper.c
+ ${ARGV0}/platform/source/newlib_nano_malloc_workaround.c
+ ${ARGV0}/rtos/include/rtos/ConditionVariable.h
+ ${ARGV0}/rtos/include/rtos/EventFlags.h
+ ${ARGV0}/rtos/include/rtos/Kernel.h
+ ${ARGV0}/rtos/include/rtos/Mail.h
+ ${ARGV0}/rtos/include/rtos/MemoryPool.h
+ ${ARGV0}/rtos/include/rtos/Mutex.h
+ ${ARGV0}/rtos/include/rtos/Queue.h
+ ${ARGV0}/rtos/include/rtos/Semaphore.h
+ ${ARGV0}/rtos/include/rtos/ThisThread.h
+ ${ARGV0}/rtos/include/rtos/Thread.h
+ ${ARGV0}/rtos/include/rtos/internal/mbed_rtos1_types.h
+ ${ARGV0}/rtos/include/rtos/internal/mbed_rtos_storage.h
+ ${ARGV0}/rtos/include/rtos/mbed_rtos_types.h
+ ${ARGV0}/rtos/include/rtos/rtos.h
+ ${ARGV0}/rtos/source/ConditionVariable.cpp
+ ${ARGV0}/rtos/source/EventFlags.cpp
+ ${ARGV0}/rtos/source/Kernel.cpp
+ ${ARGV0}/rtos/source/Mutex.cpp
+ ${ARGV0}/rtos/source/Semaphore.cpp
+ ${ARGV0}/rtos/source/ThisThread.cpp
+ ${ARGV0}/rtos/source/Thread.cpp
+ ${ARGV0}/rtos/source/rtos_handlers.h
+ ${ARGV0}/rtos/source/rtos_idle.h
+ ${ARGV0}/storage/blockdevice/COMPONENT_FLASHIAP/include/FlashIAP/FlashIAPBlockDevice.h
+ ${ARGV0}/storage/blockdevice/COMPONENT_FLASHIAP/source/FlashIAPBlockDevice.cpp
+ ${ARGV0}/storage/blockdevice/include/blockdevice/BlockDevice.h
+ ${ARGV0}/storage/blockdevice/include/blockdevice/BufferedBlockDevice.h
+ ${ARGV0}/storage/blockdevice/include/blockdevice/ChainingBlockDevice.h
+ ${ARGV0}/storage/blockdevice/include/blockdevice/ExhaustibleBlockDevice.h
+ ${ARGV0}/storage/blockdevice/include/blockdevice/FlashSimBlockDevice.h
+ ${ARGV0}/storage/blockdevice/include/blockdevice/HeapBlockDevice.h
+ ${ARGV0}/storage/blockdevice/include/blockdevice/MBRBlockDevice.h
+ ${ARGV0}/storage/blockdevice/include/blockdevice/ObservingBlockDevice.h
+ ${ARGV0}/storage/blockdevice/include/blockdevice/ProfilingBlockDevice.h
+ ${ARGV0}/storage/blockdevice/include/blockdevice/ReadOnlyBlockDevice.h
+ ${ARGV0}/storage/blockdevice/include/blockdevice/SlicingBlockDevice.h
+ ${ARGV0}/storage/blockdevice/include/blockdevice/internal/SFDP.h
+ ${ARGV0}/storage/blockdevice/source/BufferedBlockDevice.cpp
+ ${ARGV0}/storage/blockdevice/source/ChainingBlockDevice.cpp
+ ${ARGV0}/storage/blockdevice/source/ExhaustibleBlockDevice.cpp
+ ${ARGV0}/storage/blockdevice/source/FlashSimBlockDevice.cpp
+ ${ARGV0}/storage/blockdevice/source/HeapBlockDevice.cpp
+ ${ARGV0}/storage/blockdevice/source/MBRBlockDevice.cpp
+ ${ARGV0}/storage/blockdevice/source/ObservingBlockDevice.cpp
+ ${ARGV0}/storage/blockdevice/source/ProfilingBlockDevice.cpp
+ ${ARGV0}/storage/blockdevice/source/ReadOnlyBlockDevice.cpp
+ ${ARGV0}/storage/blockdevice/source/SFDP.cpp
+ ${ARGV0}/storage/blockdevice/source/SlicingBlockDevice.cpp
+ ${ARGV0}/storage/filesystem/fat/ChaN/diskio.h
+ ${ARGV0}/storage/filesystem/fat/ChaN/ff.cpp
+ ${ARGV0}/storage/filesystem/fat/ChaN/ff.h
+ ${ARGV0}/storage/filesystem/fat/ChaN/ffconf.h
+ ${ARGV0}/storage/filesystem/fat/ChaN/ffunicode.cpp
+ ${ARGV0}/storage/filesystem/fat/ChaN/integer.h
+ ${ARGV0}/storage/filesystem/fat/include/fat/FATFileSystem.h
+ ${ARGV0}/storage/filesystem/fat/source/FATFileSystem.cpp
+ ${ARGV0}/storage/filesystem/include/filesystem/Dir.h
+ ${ARGV0}/storage/filesystem/include/filesystem/File.h
+ ${ARGV0}/storage/filesystem/include/filesystem/FileSystem.h
+ ${ARGV0}/storage/filesystem/include/filesystem/mbed_filesystem.h
+ ${ARGV0}/storage/filesystem/littlefs/include/littlefs/LittleFileSystem.h
+ ${ARGV0}/storage/filesystem/littlefs/littlefs/lfs.c
+ ${ARGV0}/storage/filesystem/littlefs/littlefs/lfs.h
+ ${ARGV0}/storage/filesystem/littlefs/littlefs/lfs_util.c
+ ${ARGV0}/storage/filesystem/littlefs/littlefs/lfs_util.h
+ ${ARGV0}/storage/filesystem/littlefs/source/LittleFileSystem.cpp
+ ${ARGV0}/storage/filesystem/littlefsv2/include/littlefsv2/LittleFileSystem2.h
+ ${ARGV0}/storage/filesystem/littlefsv2/littlefs/lfs2.c
+ ${ARGV0}/storage/filesystem/littlefsv2/littlefs/lfs2.h
+ ${ARGV0}/storage/filesystem/littlefsv2/littlefs/lfs2_util.c
+ ${ARGV0}/storage/filesystem/littlefsv2/littlefs/lfs2_util.h
+ ${ARGV0}/storage/filesystem/littlefsv2/source/LittleFileSystem2.cpp
+ ${ARGV0}/storage/filesystem/source/Dir.cpp
+ ${ARGV0}/storage/filesystem/source/File.cpp
+ ${ARGV0}/storage/filesystem/source/FileSystem.cpp
+ ${ARGV0}/storage/kvstore/direct_access_devicekey/include/direct_access_devicekey/DirectAccessDevicekey.h
+ ${ARGV0}/storage/kvstore/direct_access_devicekey/source/DirectAccessDevicekey.cpp
+ ${ARGV0}/storage/kvstore/filesystemstore/include/filesystemstore/FileSystemStore.h
+ ${ARGV0}/storage/kvstore/filesystemstore/source/FileSystemStore.cpp
+ ${ARGV0}/storage/kvstore/include/kvstore/KVStore.h
+ ${ARGV0}/storage/kvstore/kv_config/include/kv_config/kv_config.h
+ ${ARGV0}/storage/kvstore/kv_config/source/kv_config.cpp
+ ${ARGV0}/storage/kvstore/kvstore_global_api/include/kvstore_global_api/KVMap.h
+ ${ARGV0}/storage/kvstore/kvstore_global_api/include/kvstore_global_api/kvstore_global_api.h
+ ${ARGV0}/storage/kvstore/kvstore_global_api/source/KVMap.cpp
+ ${ARGV0}/storage/kvstore/kvstore_global_api/source/kvstore_global_api.cpp
+ ${ARGV0}/storage/kvstore/securestore/include/securestore/SecureStore.h
+ ${ARGV0}/storage/kvstore/securestore/source/SecureStore.cpp
+ ${ARGV0}/storage/kvstore/tdbstore/include/tdbstore/TDBStore.h
+ ${ARGV0}/storage/kvstore/tdbstore/source/TDBStore.cpp
+ ${ARGV0}/storage/platform/source/PlatformStorage.cpp
+ ${ARGV0}/targets/TARGET_STM/PeripheralPins.h
+ ${ARGV0}/targets/TARGET_STM/PinNamesTypes.h
+ ${ARGV0}/targets/TARGET_STM/PortNames.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/PeripheralNames.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h723xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h725xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h730xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h730xxq.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h733xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h735xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h742xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h743xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h745xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h747xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h750xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h753xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h755xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h757xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7a3xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7a3xxq.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7b0xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7b0xxq.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7b3xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7b3xxq.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/stm32h7xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS/system_stm32h7xx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/Legacy/stm32_hal_legacy.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_adc.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_adc.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_adc_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_adc_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cec.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cec.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_comp.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_comp.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cordic.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cordic.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cortex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cortex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_crc.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_crc.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_crc_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_crc_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cryp.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cryp.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cryp_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_cryp_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dac.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dac.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dac_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dac_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dcmi.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dcmi.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_def.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dfsdm.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dfsdm.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dfsdm_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dfsdm_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma2d.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma2d.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dma_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dsi.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dsi.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dts.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_dts.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_eth.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_eth.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_eth_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_eth_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_exti.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_exti.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_fdcan.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_fdcan.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_flash.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_flash.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_flash_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_flash_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_fmac.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_fmac.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_gfxmmu.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_gfxmmu.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_gpio.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_gpio.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_gpio_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hash.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hash.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hash_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hash_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hcd.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hcd.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hrtim.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hrtim.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hsem.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_hsem.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2c.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2c.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2c_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2c_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2s.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2s.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2s_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_i2s_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_irda.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_irda.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_irda_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_iwdg.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_iwdg.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_jpeg.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_jpeg.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_lptim.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_lptim.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ltdc.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ltdc.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ltdc_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ltdc_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mdios.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mdios.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mdma.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mdma.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mmc.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mmc.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mmc_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_mmc_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_nand.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_nand.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_nor.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_nor.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_opamp.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_opamp.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_opamp_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_opamp_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ospi.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ospi.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_otfdec.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_otfdec.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pcd.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pcd.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pcd_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pcd_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pssi.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pssi.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pwr.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pwr.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pwr_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_pwr_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_qspi.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_qspi.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ramecc.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_ramecc.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rcc.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rcc.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rcc_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rcc_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rng.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rng.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rng_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rng_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rtc.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rtc.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rtc_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_rtc_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sai.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sai.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sai_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sai_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sd.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sd.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sd_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sd_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sdram.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sdram.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smartcard.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smartcard.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smartcard_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smartcard_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smbus.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_smbus.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spdifrx.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spdifrx.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spi.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spi.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spi_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_spi_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sram.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_sram.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_swpmi.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_swpmi.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_tim.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_tim.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_tim_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_tim_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_uart.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_uart.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_uart_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_uart_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_usart.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_usart.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_usart_ex.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_usart_ex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_wwdg.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_hal_wwdg.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_adc.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_adc.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_bdma.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_bdma.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_bus.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_comp.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_comp.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_cordic.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_cordic.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_cortex.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_crc.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_crc.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_crs.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_crs.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dac.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dac.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_delayblock.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_delayblock.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dma.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dma.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dma2d.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dma2d.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_dmamux.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_exti.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_exti.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_fmac.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_fmac.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_fmc.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_fmc.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_gpio.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_gpio.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_hrtim.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_hrtim.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_hsem.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_i2c.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_i2c.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_iwdg.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_lptim.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_lptim.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_lpuart.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_lpuart.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_mdma.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_mdma.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_opamp.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_opamp.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_pwr.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_pwr.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rcc.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rcc.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rng.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rng.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rtc.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_rtc.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_sdmmc.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_sdmmc.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_spi.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_spi.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_swpmi.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_swpmi.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_system.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_tim.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_tim.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_usart.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_usart.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_usb.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_usb.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_utils.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_utils.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/stm32h7xx_ll_wwdg.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/stm32h7xx_hal_conf.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/system_stm32h7xx_dualcore_boot_cm4_cm7.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/system_stm32h7xx_singlecore.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/TARGET_NUCLEO_H743ZI2/PeripheralPins.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/TARGET_NUCLEO_H743ZI2/PinNames.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/TARGET_NUCLEO_H743ZI2/system_clock.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/cmsis_nvic.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/analogin_device.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/analogout_device.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/cmsis.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/flash_api.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/gpio_irq_device.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/gpio_irq_device.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/i2c_device.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/i2c_device.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/objects.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/pin_device.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/pwmout_device.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/pwmout_device.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/serial_device.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/spi_api.c
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/spi_device.h
+ ${ARGV0}/targets/TARGET_STM/TARGET_STM32H7/us_ticker_data.h
+ ${ARGV0}/targets/TARGET_STM/USBPhyHw.h
+ ${ARGV0}/targets/TARGET_STM/USBPhy_STM32.cpp
+ ${ARGV0}/targets/TARGET_STM/analogin_api.c
+ ${ARGV0}/targets/TARGET_STM/analogout_api.c
+ ${ARGV0}/targets/TARGET_STM/can_api.c
+ ${ARGV0}/targets/TARGET_STM/device.h
+ ${ARGV0}/targets/TARGET_STM/gpio_api.c
+ ${ARGV0}/targets/TARGET_STM/gpio_irq_api.c
+ ${ARGV0}/targets/TARGET_STM/gpio_object.h
+ ${ARGV0}/targets/TARGET_STM/hal_tick_overrides.c
+ ${ARGV0}/targets/TARGET_STM/i2c_api.c
+ ${ARGV0}/targets/TARGET_STM/lp_ticker.c
+ ${ARGV0}/targets/TARGET_STM/lp_ticker_defines.h
+ ${ARGV0}/targets/TARGET_STM/mbed_crc_api.c
+ ${ARGV0}/targets/TARGET_STM/mbed_overrides.c
+ ${ARGV0}/targets/TARGET_STM/mbed_rtx.h
+ ${ARGV0}/targets/TARGET_STM/nvic_addr.h
+ ${ARGV0}/targets/TARGET_STM/ospi_api.c
+ ${ARGV0}/targets/TARGET_STM/pinmap.c
+ ${ARGV0}/targets/TARGET_STM/port_api.c
+ ${ARGV0}/targets/TARGET_STM/pwmout_api.c
+ ${ARGV0}/targets/TARGET_STM/qspi_api.c
+ ${ARGV0}/targets/TARGET_STM/reset_reason.c
+ ${ARGV0}/targets/TARGET_STM/rtc_api.c
+ ${ARGV0}/targets/TARGET_STM/rtc_api_hal.h
+ ${ARGV0}/targets/TARGET_STM/serial_api.c
+ ${ARGV0}/targets/TARGET_STM/serial_api_hal.h
+ ${ARGV0}/targets/TARGET_STM/sleep.c
+ ${ARGV0}/targets/TARGET_STM/stm32_assert.h
+ ${ARGV0}/targets/TARGET_STM/stm_spi_api.c
+ ${ARGV0}/targets/TARGET_STM/trng_api.c
+ ${ARGV0}/targets/TARGET_STM/us_ticker.c
+ ${ARGV0}/targets/TARGET_STM/us_ticker_defines.h
+ ${ARGV0}/targets/TARGET_STM/watchdog_api.c
+ mbed_config.h
+ )
+endmacro()
+
+macro(target_include_directories_mbed)
+ target_include_directories(${ARGV0} PRIVATE
+ ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI/TARGET_NUCLEO_H743ZI2
+ ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/TARGET_STM32H743xI
+ ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver/Legacy
+ ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/STM32H7xx_HAL_Driver
+ ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW/CMSIS
+ ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7/STM32Cube_FW
+ ${ARGV1}/targets/TARGET_STM/TARGET_STM32H7
+ ${ARGV1}/targets/TARGET_STM
+ ${ARGV1}/storage/kvstore/tdbstore/include/tdbstore
+ ${ARGV1}/storage/kvstore/tdbstore/include
+ ${ARGV1}/storage/kvstore/tdbstore
+ ${ARGV1}/storage/kvstore/securestore/include/securestore
+ ${ARGV1}/storage/kvstore/securestore/include
+ ${ARGV1}/storage/kvstore/securestore
+ ${ARGV1}/storage/kvstore/kvstore_global_api/include/kvstore_global_api
+ ${ARGV1}/storage/kvstore/kvstore_global_api/include
+ ${ARGV1}/storage/kvstore/kvstore_global_api
+ ${ARGV1}/storage/kvstore/kv_config/include/kv_config
+ ${ARGV1}/storage/kvstore/kv_config/include
+ ${ARGV1}/storage/kvstore/kv_config
+ ${ARGV1}/storage/kvstore/include/kvstore
+ ${ARGV1}/storage/kvstore/include
+ ${ARGV1}/storage/kvstore/filesystemstore/include/filesystemstore
+ ${ARGV1}/storage/kvstore/filesystemstore/include
+ ${ARGV1}/storage/kvstore/filesystemstore
+ ${ARGV1}/storage/kvstore/direct_access_devicekey/include/direct_access_devicekey
+ ${ARGV1}/storage/kvstore/direct_access_devicekey/include
+ ${ARGV1}/storage/kvstore/direct_access_devicekey
+ ${ARGV1}/storage/kvstore
+ ${ARGV1}/storage/filesystem/littlefsv2/littlefs
+ ${ARGV1}/storage/filesystem/littlefsv2/include/littlefsv2
+ ${ARGV1}/storage/filesystem/littlefsv2/include
+ ${ARGV1}/storage/filesystem/littlefsv2
+ ${ARGV1}/storage/filesystem/littlefs/littlefs
+ ${ARGV1}/storage/filesystem/littlefs/include/littlefs
+ ${ARGV1}/storage/filesystem/littlefs/include
+ ${ARGV1}/storage/filesystem/littlefs
+ ${ARGV1}/storage/filesystem/include/filesystem
+ ${ARGV1}/storage/filesystem/include
+ ${ARGV1}/storage/filesystem/fat/include/fat
+ ${ARGV1}/storage/filesystem/fat/include
+ ${ARGV1}/storage/filesystem/fat/ChaN
+ ${ARGV1}/storage/filesystem/fat
+ ${ARGV1}/storage/filesystem
+ ${ARGV1}/storage/blockdevice/include/blockdevice/internal
+ ${ARGV1}/storage/blockdevice/include/blockdevice
+ ${ARGV1}/storage/blockdevice/include
+ ${ARGV1}/storage/blockdevice/COMPONENT_FLASHIAP/include/FlashIAP
+ ${ARGV1}/storage/blockdevice/COMPONENT_FLASHIAP/include
+ ${ARGV1}/storage/blockdevice/COMPONENT_FLASHIAP
+ ${ARGV1}/storage/blockdevice
+ ${ARGV1}/storage
+ ${ARGV1}/rtos/source
+ ${ARGV1}/rtos/include/rtos/internal
+ ${ARGV1}/rtos/include/rtos
+ ${ARGV1}/rtos/include
+ ${ARGV1}/rtos
+ ${ARGV1}/platform/source/minimal-printf
+ ${ARGV1}/platform/source
+ ${ARGV1}/platform/randlib/include/mbed-client-randlib/platform
+ ${ARGV1}/platform/randlib/include/mbed-client-randlib
+ ${ARGV1}/platform/randlib/include
+ ${ARGV1}/platform/randlib
+ ${ARGV1}/platform/mbed-trace/include/mbed-trace
+ ${ARGV1}/platform/mbed-trace/include
+ ${ARGV1}/platform/mbed-trace
+ ${ARGV1}/platform/include/platform/internal
+ ${ARGV1}/platform/include/platform
+ ${ARGV1}/platform/include
+ ${ARGV1}/platform/cxxsupport
+ ${ARGV1}/platform
+ ${ARGV1}/hal/usb/include/usb
+ ${ARGV1}/hal/usb/include
+ ${ARGV1}/hal/usb
+ ${ARGV1}/hal/include/hal
+ ${ARGV1}/hal/include
+ ${ARGV1}/hal
+ ${ARGV1}/features/frameworks/utest/utest
+ ${ARGV1}/features/frameworks/utest
+ ${ARGV1}/features/frameworks/unity/unity
+ ${ARGV1}/features/frameworks/unity
+ ${ARGV1}/features/frameworks/mbed-client-cli/mbed-client-cli
+ ${ARGV1}/features/frameworks/mbed-client-cli
+ ${ARGV1}/features/frameworks/greentea-client/greentea-client
+ ${ARGV1}/features/frameworks/greentea-client
+ ${ARGV1}/features/frameworks
+ ${ARGV1}/features
+ ${ARGV1}/events/include/events/internal
+ ${ARGV1}/events/include/events
+ ${ARGV1}/events/include
+ ${ARGV1}/events
+ ${ARGV1}/drivers/usb/include/usb/internal
+ ${ARGV1}/drivers/usb/include/usb
+ ${ARGV1}/drivers/usb/include
+ ${ARGV1}/drivers/usb
+ ${ARGV1}/drivers/include/drivers/interfaces
+ ${ARGV1}/drivers/include/drivers
+ ${ARGV1}/drivers/include
+ ${ARGV1}/drivers/device_key/include/device_key
+ ${ARGV1}/drivers/device_key/include
+ ${ARGV1}/drivers/device_key
+ ${ARGV1}/drivers
+ ${ARGV1}/connectivity/nfc/libraries/stack/transceiver
+ ${ARGV1}/connectivity/nfc/libraries/stack/tech/type4
+ ${ARGV1}/connectivity/nfc/libraries/stack/tech/isodep
+ ${ARGV1}/connectivity/nfc/libraries/stack/tech/iso7816
+ ${ARGV1}/connectivity/nfc/libraries/stack/tech
+ ${ARGV1}/connectivity/nfc/libraries/stack/platform
+ ${ARGV1}/connectivity/nfc/libraries/stack/ndef
+ ${ARGV1}/connectivity/nfc/libraries/stack
+ ${ARGV1}/connectivity/nfc/libraries/acore/acore
+ ${ARGV1}/connectivity/nfc/libraries/acore
+ ${ARGV1}/connectivity/nfc/libraries
+ ${ARGV1}/connectivity/nfc/include/nfc/ndef/common
+ ${ARGV1}/connectivity/nfc/include/nfc/ndef
+ ${ARGV1}/connectivity/nfc/include/nfc
+ ${ARGV1}/connectivity/nfc/include
+ ${ARGV1}/connectivity/nfc
+ ${ARGV1}/connectivity/netsocket/include/netsocket
+ ${ARGV1}/connectivity/netsocket/include
+ ${ARGV1}/connectivity/netsocket
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/libNET/src
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/libNET
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/libDHCPv6
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/ipv6_stack
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/configs/base
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/configs
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/whiteboard
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/utils
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/random_early_detection
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/pan_blacklist
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/nist_aes_kw
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/nd_proxy
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mle_service
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/stack
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/services/serial
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/services/poll
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/services/mdns
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/services/dns
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/services
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/port/cpu
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/port/compiler
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack/port
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet/fnet_stack
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns/fnet
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mdns
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/mac_neighbor_table
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/load_balance
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/ieee_802_11
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/hmac
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/fnv_hash
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/fhss
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/etx
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/blacklist
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/Trickle
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs/Neighbor_cache
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Service_Libs
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/tls_sec_prot
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/radius_sec_prot
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/msg_sec_prot
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/key_sec_prot
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/gkh_sec_prot
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/fwh_sec_prot
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols/eap_tls_sec_prot
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/protocols
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/kmp
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/eapol
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/TLS
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/PANA
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security/Common
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Security
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/RPL
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/NWK_INTERFACE/Include
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/NWK_INTERFACE
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/MPL
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/MLE
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/MAC/virtual_rf
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/MAC/IEEE802_15_4
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/MAC
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/DHCPv6_client
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/DHCPv6_Server
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Core/include
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Core
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/Common_Protocols
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/BorderRouter
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/ws
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/Thread
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/NVM
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/ND
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/Mesh
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/MAC
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/IPHC_Decode
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/Fragmentation
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN/Bootstraps
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source/6LoWPAN
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/source
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/nanostack/platform
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack/nanostack
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack-eventloop/source
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack-eventloop/nanostack-event-loop/platform
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack-eventloop/nanostack-event-loop
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack-eventloop
+ ${ARGV1}/connectivity/nanostack/sal-stack-nanostack
+ ${ARGV1}/connectivity/nanostack/nanostack-hal-mbed-cmsis-rtos
+ ${ARGV1}/connectivity/nanostack/mbed-mesh-api/source/include
+ ${ARGV1}/connectivity/nanostack/mbed-mesh-api/source
+ ${ARGV1}/connectivity/nanostack/mbed-mesh-api/mbed-mesh-api
+ ${ARGV1}/connectivity/nanostack/mbed-mesh-api
+ ${ARGV1}/connectivity/nanostack/include/nanostack-interface
+ ${ARGV1}/connectivity/nanostack/include
+ ${ARGV1}/connectivity/nanostack/coap-service/source/include
+ ${ARGV1}/connectivity/nanostack/coap-service/source
+ ${ARGV1}/connectivity/nanostack/coap-service/coap-service
+ ${ARGV1}/connectivity/nanostack/coap-service
+ ${ARGV1}/connectivity/nanostack
+ ${ARGV1}/connectivity/mbedtls/source
+ ${ARGV1}/connectivity/mbedtls/platform/inc
+ ${ARGV1}/connectivity/mbedtls/platform
+ ${ARGV1}/connectivity/mbedtls/include/mbedtls
+ ${ARGV1}/connectivity/mbedtls/include
+ ${ARGV1}/connectivity/mbedtls
+ ${ARGV1}/connectivity/lwipstack/lwip/src/include/netif
+ ${ARGV1}/connectivity/lwipstack/lwip/src/include/lwip/prot
+ ${ARGV1}/connectivity/lwipstack/lwip/src/include/lwip/priv
+ ${ARGV1}/connectivity/lwipstack/lwip/src/include/lwip
+ ${ARGV1}/connectivity/lwipstack/lwip/src/include/compat/posix/sys
+ ${ARGV1}/connectivity/lwipstack/lwip/src/include/compat/posix/net
+ ${ARGV1}/connectivity/lwipstack/lwip/src/include/compat/posix/arpa
+ ${ARGV1}/connectivity/lwipstack/lwip/src/include/compat/posix
+ ${ARGV1}/connectivity/lwipstack/lwip/src/include/compat
+ ${ARGV1}/connectivity/lwipstack/lwip/src/include
+ ${ARGV1}/connectivity/lwipstack/lwip/src
+ ${ARGV1}/connectivity/lwipstack/lwip-sys/arch
+ ${ARGV1}/connectivity/lwipstack/lwip-sys
+ ${ARGV1}/connectivity/lwipstack/lwip
+ ${ARGV1}/connectivity/lwipstack/include/lwipstack
+ ${ARGV1}/connectivity/lwipstack/include
+ ${ARGV1}/connectivity/lwipstack
+ ${ARGV1}/connectivity/lorawan/system
+ ${ARGV1}/connectivity/lorawan/lorastack/phy
+ ${ARGV1}/connectivity/lorawan/lorastack/mac
+ ${ARGV1}/connectivity/lorawan/lorastack
+ ${ARGV1}/connectivity/lorawan/include/lorawan
+ ${ARGV1}/connectivity/lorawan/include
+ ${ARGV1}/connectivity/lorawan
+ ${ARGV1}/connectivity/libraries/ppp/include/ppp
+ ${ARGV1}/connectivity/libraries/ppp/include/polarssl
+ ${ARGV1}/connectivity/libraries/ppp/include
+ ${ARGV1}/connectivity/libraries/ppp
+ ${ARGV1}/connectivity/libraries/nanostack-libservice/mbed-client-libservice/platform
+ ${ARGV1}/connectivity/libraries/nanostack-libservice/mbed-client-libservice
+ ${ARGV1}/connectivity/libraries/nanostack-libservice
+ ${ARGV1}/connectivity/libraries/mbed-coap/source/include
+ ${ARGV1}/connectivity/libraries/mbed-coap/source
+ ${ARGV1}/connectivity/libraries/mbed-coap/mbed-coap
+ ${ARGV1}/connectivity/libraries/mbed-coap
+ ${ARGV1}/connectivity/libraries
+ ${ARGV1}/connectivity/drivers/wifi/esp8266-driver/ESP8266
+ ${ARGV1}/connectivity/drivers/wifi/esp8266-driver
+ ${ARGV1}/connectivity/drivers/wifi
+ ${ARGV1}/connectivity/drivers/nfc/PN512/source/transceiver
+ ${ARGV1}/connectivity/drivers/nfc/PN512/source
+ ${ARGV1}/connectivity/drivers/nfc/PN512/include/nfc/controllers
+ ${ARGV1}/connectivity/drivers/nfc/PN512/include/nfc
+ ${ARGV1}/connectivity/drivers/nfc/PN512/include
+ ${ARGV1}/connectivity/drivers/nfc/PN512
+ ${ARGV1}/connectivity/drivers/nfc
+ ${ARGV1}/connectivity/drivers/mbedtls/TARGET_STM
+ ${ARGV1}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7/lan8742
+ ${ARGV1}/connectivity/drivers/emac/TARGET_STM/TARGET_STM32H7
+ ${ARGV1}/connectivity/drivers/emac/TARGET_STM
+ ${ARGV1}/connectivity/drivers/cellular/UBLOX/PPP
+ ${ARGV1}/connectivity/drivers/cellular/UBLOX/N2XX
+ ${ARGV1}/connectivity/drivers/cellular/UBLOX/AT
+ ${ARGV1}/connectivity/drivers/cellular/UBLOX
+ ${ARGV1}/connectivity/drivers/cellular/TELIT/ME910
+ ${ARGV1}/connectivity/drivers/cellular/TELIT/ME310
+ ${ARGV1}/connectivity/drivers/cellular/TELIT/HE910
+ ${ARGV1}/connectivity/drivers/cellular/TELIT
+ ${ARGV1}/connectivity/drivers/cellular/RiotMicro/AT
+ ${ARGV1}/connectivity/drivers/cellular/RiotMicro
+ ${ARGV1}/connectivity/drivers/cellular/QUECTEL/UG96
+ ${ARGV1}/connectivity/drivers/cellular/QUECTEL/M26
+ ${ARGV1}/connectivity/drivers/cellular/QUECTEL/EC2X
+ ${ARGV1}/connectivity/drivers/cellular/QUECTEL/BG96
+ ${ARGV1}/connectivity/drivers/cellular/QUECTEL/BC95
+ ${ARGV1}/connectivity/drivers/cellular/QUECTEL
+ ${ARGV1}/connectivity/drivers/cellular/MultiTech/DragonflyNano/PPP
+ ${ARGV1}/connectivity/drivers/cellular/MultiTech/DragonflyNano
+ ${ARGV1}/connectivity/drivers/cellular/MultiTech
+ ${ARGV1}/connectivity/drivers/cellular/GENERIC/GENERIC_AT3GPP
+ ${ARGV1}/connectivity/drivers/cellular/GENERIC
+ ${ARGV1}/connectivity/drivers/cellular/GEMALTO/CINTERION
+ ${ARGV1}/connectivity/drivers/cellular/GEMALTO
+ ${ARGV1}/connectivity/drivers/cellular/Altair/ALT1250/PPP
+ ${ARGV1}/connectivity/drivers/cellular/Altair/ALT1250
+ ${ARGV1}/connectivity/drivers/cellular/Altair
+ ${ARGV1}/connectivity/drivers/cellular
+ ${ARGV1}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/stm-s2lp-rf-driver
+ ${ARGV1}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver/source
+ ${ARGV1}/connectivity/drivers/802.15.4_RF/stm-s2lp-rf-driver
+ ${ARGV1}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/source
+ ${ARGV1}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver/mcr20a-rf-driver
+ ${ARGV1}/connectivity/drivers/802.15.4_RF/mcr20a-rf-driver
+ ${ARGV1}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/source
+ ${ARGV1}/connectivity/drivers/802.15.4_RF/atmel-rf-driver/atmel-rf-driver
+ ${ARGV1}/connectivity/drivers/802.15.4_RF/atmel-rf-driver
+ ${ARGV1}/connectivity/drivers/802.15.4_RF
+ ${ARGV1}/connectivity/drivers
+ ${ARGV1}/connectivity/cellular/include/cellular/framework/device
+ ${ARGV1}/connectivity/cellular/include/cellular/framework/common
+ ${ARGV1}/connectivity/cellular/include/cellular/framework/AT
+ ${ARGV1}/connectivity/cellular/include/cellular/framework/API
+ ${ARGV1}/connectivity/cellular/include/cellular/framework
+ ${ARGV1}/connectivity/cellular/include/cellular
+ ${ARGV1}/connectivity/cellular/include
+ ${ARGV1}/connectivity/cellular
+ ${ARGV1}/connectivity
+ ${ARGV1}/cmsis/device/rtos/include
+ ${ARGV1}/cmsis/device/rtos
+ ${ARGV1}/cmsis/device/RTE/include
+ ${ARGV1}/cmsis/device/RTE
+ ${ARGV1}/cmsis/device
+ ${ARGV1}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M/Include
+ ${ARGV1}/cmsis/CMSIS_5/CMSIS/TARGET_CORTEX_M
+ ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Source
+ ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include1
+ ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Include
+ ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX/Config
+ ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/RTX
+ ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2/Include
+ ${ARGV1}/cmsis/CMSIS_5/CMSIS/RTOS2
+ ${ARGV1}/cmsis/CMSIS_5/CMSIS
+ ${ARGV1}/cmsis/CMSIS_5
+ ${ARGV1}/cmsis
+ ${ARGV1}
+ )
+
+endmacro()
--- /dev/null
+/*
+ * mbed SDK
+ * Copyright (c) 2017 ARM Limited
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Automatically generated configuration file.
+// DO NOT EDIT, content will be overwritten.
+
+#ifndef __MBED_CONFIG_DATA__
+#define __MBED_CONFIG_DATA__
+
+// Configuration parameters
+#define CLOCK_SOURCE USE_PLL_HSE_EXTC | USE_PLL_HSI // set by target:MCU_STM32H7
+#define HSE_VALUE 8000000 // set by target:NUCLEO_H743ZI2
+#define LPTICKER_DELAY_TICKS 0 // set by target:MCU_STM32H7
+#define MBED_CONF_ALT1250_PPP_BAUDRATE 115200 // set by library:ALT1250_PPP
+#define MBED_CONF_ALT1250_PPP_PROVIDE_DEFAULT 0 // set by library:ALT1250_PPP
+#define MBED_CONF_ATMEL_RF_ASSUME_SPACED_SPI 1 // set by library:atmel-rf[STM]
+#define MBED_CONF_ATMEL_RF_FULL_SPI_SPEED 7500000 // set by library:atmel-rf
+#define MBED_CONF_ATMEL_RF_FULL_SPI_SPEED_BYTE_SPACING 250 // set by library:atmel-rf
+#define MBED_CONF_ATMEL_RF_IRQ_THREAD_STACK_SIZE 1024 // set by library:atmel-rf
+#define MBED_CONF_ATMEL_RF_LOW_SPI_SPEED 3750000 // set by library:atmel-rf
+#define MBED_CONF_ATMEL_RF_PROVIDE_DEFAULT 0 // set by library:atmel-rf
+#define MBED_CONF_ATMEL_RF_USE_SPI_SPACING_API 0 // set by library:atmel-rf
+#define MBED_CONF_CELLULAR_AT_HANDLER_BUFFER_SIZE 32 // set by library:cellular
+#define MBED_CONF_CELLULAR_CONTROL_PLANE_OPT 0 // set by library:cellular
+#define MBED_CONF_CELLULAR_DEBUG_AT 0 // set by library:cellular
+#define MBED_CONF_CELLULAR_MAX_CP_DATA_RECV_LEN 1358 // set by library:cellular
+#define MBED_CONF_CELLULAR_PRESENT 1 // set by library:cellular
+#define MBED_CONF_CELLULAR_RANDOM_MAX_START_DELAY 0 // set by library:cellular
+#define MBED_CONF_CELLULAR_USE_APN_LOOKUP 0 // set by library:cellular
+#define MBED_CONF_CELLULAR_USE_SMS 0 // set by library:cellular
+#define MBED_CONF_DRIVERS_OSPI_CSN OSPI_FLASH1_CSN // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_DQS OSPI_FLASH1_DQS // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO0 OSPI_FLASH1_IO0 // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO1 OSPI_FLASH1_IO1 // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO2 OSPI_FLASH1_IO2 // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO3 OSPI_FLASH1_IO3 // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO4 OSPI_FLASH1_IO4 // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO5 OSPI_FLASH1_IO5 // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO6 OSPI_FLASH1_IO6 // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_IO7 OSPI_FLASH1_IO7 // set by library:drivers
+#define MBED_CONF_DRIVERS_OSPI_SCK OSPI_FLASH1_SCK // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_CSN QSPI_FLASH1_CSN // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_IO0 QSPI_FLASH1_IO0 // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_IO1 QSPI_FLASH1_IO1 // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_IO2 QSPI_FLASH1_IO2 // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_IO3 QSPI_FLASH1_IO3 // set by library:drivers
+#define MBED_CONF_DRIVERS_QSPI_SCK QSPI_FLASH1_SCK // set by library:drivers
+#define MBED_CONF_DRIVERS_UART_SERIAL_RXBUF_SIZE 256 // set by library:drivers
+#define MBED_CONF_DRIVERS_UART_SERIAL_TXBUF_SIZE 256 // set by library:drivers
+#define MBED_CONF_ESP8266_BUILT_IN_DNS 0 // set by library:esp8266
+#define MBED_CONF_ESP8266_DEBUG 0 // set by library:esp8266
+#define MBED_CONF_ESP8266_POWER_OFF_TIME_MS 3 // set by library:esp8266
+#define MBED_CONF_ESP8266_POWER_ON_POLARITY 0 // set by library:esp8266
+#define MBED_CONF_ESP8266_POWER_ON_TIME_MS 3 // set by library:esp8266
+#define MBED_CONF_ESP8266_PROVIDE_DEFAULT 0 // set by library:esp8266
+#define MBED_CONF_ESP8266_SERIAL_BAUDRATE 115200 // set by library:esp8266
+#define MBED_CONF_ESP8266_SNTP_ENABLE 0 // set by library:esp8266
+#define MBED_CONF_ESP8266_SNTP_SERVER0 "" // set by library:esp8266
+#define MBED_CONF_ESP8266_SNTP_SERVER1 "" // set by library:esp8266
+#define MBED_CONF_ESP8266_SNTP_SERVER2 "" // set by library:esp8266
+#define MBED_CONF_ESP8266_SNTP_TIMEZONE 0 // set by library:esp8266
+#define MBED_CONF_ESP8266_SOCKET_BUFSIZE 8192 // set by library:esp8266
+#define MBED_CONF_EVENTS_PRESENT 1 // set by library:events
+#define MBED_CONF_EVENTS_SHARED_DISPATCH_FROM_APPLICATION 0 // set by library:events
+#define MBED_CONF_EVENTS_SHARED_EVENTSIZE 768 // set by library:events
+#define MBED_CONF_EVENTS_SHARED_HIGHPRIO_EVENTSIZE 256 // set by library:events
+#define MBED_CONF_EVENTS_SHARED_HIGHPRIO_STACKSIZE 1024 // set by library:events
+#define MBED_CONF_EVENTS_SHARED_STACKSIZE 2048 // set by library:events
+#define MBED_CONF_EVENTS_USE_LOWPOWER_TIMER_TICKER 0 // set by library:events
+#define MBED_CONF_FAT_CHAN_FFS_DBG 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_CODE_PAGE 437 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_EXFAT 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_HEAPBUF 1 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_LOCK 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_MINIMIZE 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_NOFSINFO 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_NORTC 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_READONLY 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_REENTRANT 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_RPATH 1 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_TIMEOUT 1000 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_FS_TINY 1 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_LFN_BUF 255 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_LFN_UNICODE 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_MAX_LFN 255 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_MAX_SS 4096 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_MIN_SS 512 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_MULTI_PARTITION 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_NORTC_MDAY 1 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_NORTC_MON 1 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_NORTC_YEAR 2017 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_PRINT_FLOAT 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_PRINT_LLI 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_SFN_BUF 12 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_STRF_ENCODE 3 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_STR_VOLUME_ID 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_SYNC_T HANDLE // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_CHMOD 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_EXPAND 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_FASTSEEK 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_FIND 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_FORWARD 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_LABEL 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_LFN 3 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_MKFS 1 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_STRFUNC 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_USE_TRIM 1 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_VOLUMES 4 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FF_VOLUME_STRS \
+ "RAM", "NAND", "CF", "SD", "SD2", "USB", "USB2", "USB3" // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FLUSH_ON_NEW_CLUSTER 0 // set by library:fat_chan
+#define MBED_CONF_FAT_CHAN_FLUSH_ON_NEW_SECTOR 1 // set by library:fat_chan
+#define MBED_CONF_FILESYSTEM_PRESENT 1 // set by library:filesystem
+#define MBED_CONF_FLASHIAP_BLOCK_DEVICE_BASE_ADDRESS \
+ 0xFFFFFFFF // set by library:flashiap-block-device
+#define MBED_CONF_FLASHIAP_BLOCK_DEVICE_SIZE 0 // set by library:flashiap-block-device
+#define MBED_CONF_GEMALTO_CINTERION_BAUDRATE 115200 // set by library:GEMALTO_CINTERION
+#define MBED_CONF_GEMALTO_CINTERION_PROVIDE_DEFAULT 0 // set by library:GEMALTO_CINTERION
+#define MBED_CONF_GENERIC_AT3GPP_BAUDRATE 115200 // set by library:GENERIC_AT3GPP
+#define MBED_CONF_GENERIC_AT3GPP_PROVIDE_DEFAULT 0 // set by library:GENERIC_AT3GPP
+#define MBED_CONF_LORA_ADR_ON 1 // set by library:lora
+#define MBED_CONF_LORA_APPLICATION_EUI \
+ { \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 \
+ } // set by library:lora
+#define MBED_CONF_LORA_APPLICATION_KEY \
+ { \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 \
+ } // set by library:lora
+#define MBED_CONF_LORA_APPSKEY \
+ { \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 \
+ } // set by library:lora
+#define MBED_CONF_LORA_APP_PORT 15 // set by library:lora
+#define MBED_CONF_LORA_AUTOMATIC_UPLINK_MESSAGE 1 // set by library:lora
+#define MBED_CONF_LORA_DEVICE_ADDRESS 0x00000000 // set by library:lora
+#define MBED_CONF_LORA_DEVICE_EUI \
+ { \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 \
+ } // set by library:lora
+#define MBED_CONF_LORA_DOWNLINK_PREAMBLE_LENGTH 5 // set by library:lora
+#define MBED_CONF_LORA_DUTY_CYCLE_ON 1 // set by library:lora
+#define MBED_CONF_LORA_DUTY_CYCLE_ON_JOIN 1 // set by library:lora
+#define MBED_CONF_LORA_FSB_MASK \
+ { \
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x00FF \
+ } // set by library:lora
+#define MBED_CONF_LORA_FSB_MASK_CHINA \
+ { \
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF \
+ } // set by library:lora
+#define MBED_CONF_LORA_LBT_ON 0 // set by library:lora
+#define MBED_CONF_LORA_MAX_SYS_RX_ERROR 5 // set by library:lora
+#define MBED_CONF_LORA_NB_TRIALS 12 // set by library:lora
+#define MBED_CONF_LORA_NWKSKEY \
+ { \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 \
+ } // set by library:lora
+#define MBED_CONF_LORA_OVER_THE_AIR_ACTIVATION 1 // set by library:lora
+#define MBED_CONF_LORA_PHY EU868 // set by library:lora
+#define MBED_CONF_LORA_PUBLIC_NETWORK 1 // set by library:lora
+#define MBED_CONF_LORA_TX_MAX_SIZE 64 // set by library:lora
+#define MBED_CONF_LORA_UPLINK_PREAMBLE_LENGTH 8 // set by library:lora
+#define MBED_CONF_LORA_WAKEUP_TIME 5 // set by library:lora
+#define MBED_CONF_LWIP_ADDR_TIMEOUT 5 // set by library:lwip
+#define MBED_CONF_LWIP_ADDR_TIMEOUT_MODE 1 // set by library:lwip
+#define MBED_CONF_LWIP_DEBUG_ENABLED 0 // set by library:lwip
+#define MBED_CONF_LWIP_DEFAULT_THREAD_STACKSIZE 512 // set by library:lwip
+#define MBED_CONF_LWIP_DHCP_TIMEOUT 60 // set by library:lwip
+#define MBED_CONF_LWIP_ENABLE_PPP_TRACE 0 // set by library:lwip
+#define MBED_CONF_LWIP_ETHERNET_ENABLED 1 // set by library:lwip
+#define MBED_CONF_LWIP_IPV4_ENABLED 1 // set by library:lwip
+#define MBED_CONF_LWIP_IPV6_ENABLED 0 // set by library:lwip
+#define MBED_CONF_LWIP_IP_VER_PREF 4 // set by library:lwip
+#define MBED_CONF_LWIP_L3IP_ENABLED 0 // set by library:lwip
+#define MBED_CONF_LWIP_MBOX_SIZE 8 // set by library:lwip
+#define MBED_CONF_LWIP_MEMP_NUM_TCPIP_MSG_INPKT 8 // set by library:lwip
+#define MBED_CONF_LWIP_MEMP_NUM_TCP_SEG 16 // set by library:lwip
+#define MBED_CONF_LWIP_MEM_SIZE 2310 // set by library:lwip[STM]
+#define MBED_CONF_LWIP_ND6_QUEUEING 0 // set by library:lwip
+#define MBED_CONF_LWIP_ND6_RDNSS_MAX_DNS_SERVERS 0 // set by library:lwip
+#define MBED_CONF_LWIP_NUM_NETBUF 8 // set by library:lwip
+#define MBED_CONF_LWIP_NUM_PBUF 8 // set by library:lwip
+#define MBED_CONF_LWIP_PBUF_POOL_SIZE 5 // set by library:lwip
+#define MBED_CONF_LWIP_PPP_ENABLED 0 // set by library:lwip
+#define MBED_CONF_LWIP_PPP_IPV4_ENABLED 0 // set by library:lwip
+#define MBED_CONF_LWIP_PPP_IPV6_ENABLED 0 // set by library:lwip
+#define MBED_CONF_LWIP_PPP_THREAD_STACKSIZE 768 // set by library:lwip
+#define MBED_CONF_LWIP_PRESENT 1 // set by library:lwip
+#define MBED_CONF_LWIP_RAW_SOCKET_ENABLED 0 // set by library:lwip
+#define MBED_CONF_LWIP_SOCKET_MAX 4 // set by library:lwip
+#define MBED_CONF_LWIP_TCPIP_THREAD_PRIORITY osPriorityNormal // set by library:lwip
+#define MBED_CONF_LWIP_TCPIP_THREAD_STACKSIZE 1200 // set by library:lwip
+#define MBED_CONF_LWIP_TCP_CLOSE_TIMEOUT 1000 // set by library:lwip
+#define MBED_CONF_LWIP_TCP_ENABLED 1 // set by library:lwip
+#define MBED_CONF_LWIP_TCP_MAXRTX 6 // set by library:lwip
+#define MBED_CONF_LWIP_TCP_MSS 536 // set by library:lwip
+#define MBED_CONF_LWIP_TCP_SERVER_MAX 4 // set by library:lwip
+#define MBED_CONF_LWIP_TCP_SND_BUF (2 * TCP_MSS) // set by library:lwip
+#define MBED_CONF_LWIP_TCP_SOCKET_MAX 4 // set by library:lwip
+#define MBED_CONF_LWIP_TCP_SYNMAXRTX 6 // set by library:lwip
+#define MBED_CONF_LWIP_TCP_WND (4 * TCP_MSS) // set by library:lwip
+#define MBED_CONF_LWIP_UDP_SOCKET_MAX 4 // set by library:lwip
+#define MBED_CONF_LWIP_USE_MBED_TRACE 0 // set by library:lwip
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_CHANNEL 0 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_CHANNEL_MASK 0x7fff800 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_CHANNEL_PAGE 0 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_DEVICE_TYPE \
+ NET_6LOWPAN_ROUTER // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_PANID_FILTER 0xffff // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_PSK_KEY \
+ { \
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf \
+ } // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_PSK_KEY_ID 1 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_SECURITY_MODE NONE // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_6LOWPAN_ND_SEC_LEVEL 5 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_HEAP_SIZE 32500 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_HEAP_STAT_INFO NULL // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_MAC_NEIGH_TABLE_SIZE 32 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_RADIUS_RETRY_COUNT 3 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_RADIUS_RETRY_IMAX 30 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_RADIUS_RETRY_IMIN 20 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_SYSTEM_TIME_UPDATE_FROM_NANOSTACK 1 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_CHANNEL 22 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_CHANNEL_MASK 0x7fff800 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_CHANNEL_PAGE 0 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_COMMISSIONING_DATASET_TIMESTAMP \
+ 0x10000 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_EXTENDED_PANID \
+ { \
+ 0xf1, 0xb5, 0xa1, 0xb2, 0xc4, 0xd5, 0xa1, 0xbd \
+ } // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_ML_PREFIX \
+ { \
+ 0xfd, 0x0, 0x0d, 0xb8, 0x0, 0x0, 0x0, 0x0 \
+ } // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_NETWORK_NAME \
+ "Thread Network" // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_PANID 0x0700 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_CONFIG_PSKC \
+ { \
+ 0xc8, 0xa6, 0x2e, 0xae, 0xf3, 0x68, 0xf3, 0x46, 0xa9, 0x9e, 0x57, 0x85, 0x98, 0x9d, 0x1c, 0xd0 \
+ } // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_DEVICE_TYPE \
+ MESH_DEVICE_TYPE_THREAD_ROUTER // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_MASTER_KEY \
+ { \
+ 0x10, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff \
+ } // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_PSKD "ABCDEFGH" // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_SECURITY_POLICY 255 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_THREAD_USE_STATIC_LINK_CONFIG 1 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_USE_MALLOC_FOR_HEAP 0 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_BC_CHANNEL_FUNCTION 255 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_BC_DWELL_INTERVAL 0 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_BC_FIXED_CHANNEL 65535 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_BC_INTERVAL 0 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_CHANNEL_PLAN_ID 255 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_DEVICE_TYPE \
+ MESH_DEVICE_TYPE_WISUN_ROUTER // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_NETWORK_NAME "Wi-SUN Network" // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_OPERATING_CLASS 255 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_OPERATING_MODE 255 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_PHY_MODE_ID 255 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_REGULATORY_DOMAIN 3 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_UC_CHANNEL_FUNCTION 255 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_UC_DWELL_INTERVAL 255 // set by library:mbed-mesh-api
+#define MBED_CONF_MBED_MESH_API_WISUN_UC_FIXED_CHANNEL 65535 // set by library:mbed-mesh-api
+#define MBED_CONF_MCR20A_PROVIDE_DEFAULT 0 // set by library:mcr20a
+#define MBED_CONF_NANOSTACK_CONFIGURATION nanostack_full // set by library:nanostack
+#define MBED_CONF_NANOSTACK_HAL_CRITICAL_SECTION_USABLE_FROM_INTERRUPT \
+ 0 // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_HAL_EVENT_LOOP_DISPATCH_FROM_APPLICATION \
+ 0 // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_HAL_EVENT_LOOP_THREAD_STACK_SIZE 6144 // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_HAL_EVENT_LOOP_USE_MBED_EVENTS 0 // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_HAL_KVSTORE_PATH "/kv/" // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_HAL_USE_KVSTORE 0 // set by library:nanostack-hal
+#define MBED_CONF_NANOSTACK_LIBSERVICE_NSDYNMEM_TRACKER_ENABLED \
+ 0 // set by library:nanostack-libservice
+#define MBED_CONF_NANOSTACK_LIBSERVICE_PRESENT 1 // set by library:nanostack-libservice
+#define MBED_CONF_NSAPI_ADD_EVENT_LISTENER_RETURN_CHANGE 0 // set by library:nsapi
+#define MBED_CONF_NSAPI_DEFAULT_MESH_TYPE THREAD // set by library:nsapi
+#define MBED_CONF_NSAPI_DEFAULT_STACK LWIP // set by library:nsapi
+#define MBED_CONF_NSAPI_DEFAULT_WIFI_SECURITY NONE // set by library:nsapi
+#define MBED_CONF_NSAPI_DNS_ADDRESSES_LIMIT 10 // set by library:nsapi
+#define MBED_CONF_NSAPI_DNS_CACHE_SIZE 3 // set by library:nsapi
+#define MBED_CONF_NSAPI_DNS_RESPONSE_WAIT_TIME 10000 // set by library:nsapi
+#define MBED_CONF_NSAPI_DNS_RETRIES 1 // set by library:nsapi
+#define MBED_CONF_NSAPI_DNS_TOTAL_ATTEMPTS 10 // set by library:nsapi
+#define MBED_CONF_NSAPI_PRESENT 1 // set by library:nsapi
+#define MBED_CONF_NSAPI_SOCKET_STATS_ENABLED 0 // set by library:nsapi
+#define MBED_CONF_NSAPI_SOCKET_STATS_MAX_COUNT 10 // set by library:nsapi
+#define MBED_CONF_PLATFORM_CALLBACK_COMPARABLE 1 // set by library:platform
+#define MBED_CONF_PLATFORM_CALLBACK_NONTRIVIAL 0 // set by library:platform
+#define MBED_CONF_PLATFORM_CRASH_CAPTURE_ENABLED 0 // set by library:platform
+#define MBED_CONF_PLATFORM_CTHUNK_COUNT_MAX 8 // set by library:platform
+#define MBED_CONF_PLATFORM_DEEPSLEEP_STATS_VERBOSE 0 // set by library:platform[STM]
+#define MBED_CONF_PLATFORM_DEFAULT_SERIAL_BAUD_RATE 9600 // set by library:platform
+#define MBED_CONF_PLATFORM_ERROR_ALL_THREADS_INFO 0 // set by library:platform
+#define MBED_CONF_PLATFORM_ERROR_FILENAME_CAPTURE_ENABLED 0 // set by library:platform
+#define MBED_CONF_PLATFORM_ERROR_HIST_ENABLED 0 // set by library:platform
+#define MBED_CONF_PLATFORM_ERROR_HIST_SIZE 4 // set by library:platform
+#define MBED_CONF_PLATFORM_ERROR_REBOOT_MAX 1 // set by library:platform
+#define MBED_CONF_PLATFORM_FATAL_ERROR_AUTO_REBOOT_ENABLED 0 // set by library:platform
+#define MBED_CONF_PLATFORM_MAX_ERROR_FILENAME_LEN 16 // set by library:platform
+#define MBED_CONF_PLATFORM_MINIMAL_PRINTF_ENABLE_64_BIT 1 // set by library:platform
+#define MBED_CONF_PLATFORM_MINIMAL_PRINTF_ENABLE_FLOATING_POINT 0 // set by library:platform
+#define MBED_CONF_PLATFORM_MINIMAL_PRINTF_SET_FLOATING_POINT_MAX_DECIMALS \
+ 6 // set by library:platform
+#define MBED_CONF_PLATFORM_POLL_USE_LOWPOWER_TIMER 0 // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_BAUD_RATE 9600 // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_BUFFERED_SERIAL 0 // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_CONVERT_NEWLINES 1 // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_CONVERT_TTY_NEWLINES 1 // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_FLUSH_AT_EXIT 1 // set by library:platform
+#define MBED_CONF_PLATFORM_STDIO_MINIMAL_CONSOLE_ONLY 0 // set by library:platform
+#define MBED_CONF_PLATFORM_USE_MPU 1 // set by library:platform
+#define MBED_CONF_PPP_ENABLED 0 // set by library:ppp
+#define MBED_CONF_PPP_ENABLE_TRACE 0 // set by library:ppp
+#define MBED_CONF_PPP_IPV4_ENABLED 1 // set by library:ppp
+#define MBED_CONF_PPP_IPV6_ENABLED 0 // set by library:ppp
+#define MBED_CONF_PPP_MBED_EVENT_QUEUE 0 // set by library:ppp
+#define MBED_CONF_PPP_THREAD_STACKSIZE 816 // set by library:ppp
+#define MBED_CONF_QUECTEL_BC95_BAUDRATE 9600 // set by library:QUECTEL_BC95
+#define MBED_CONF_QUECTEL_BC95_PROVIDE_DEFAULT 0 // set by library:QUECTEL_BC95
+#define MBED_CONF_QUECTEL_BG96_BAUDRATE 115200 // set by library:QUECTEL_BG96
+#define MBED_CONF_QUECTEL_BG96_PROVIDE_DEFAULT 0 // set by library:QUECTEL_BG96
+#define MBED_CONF_QUECTEL_EC2X_BAUDRATE 115200 // set by library:QUECTEL_EC2X
+#define MBED_CONF_QUECTEL_EC2X_PROVIDE_DEFAULT 0 // set by library:QUECTEL_EC2X
+#define MBED_CONF_QUECTEL_EC2X_START_TIMEOUT 15000 // set by library:QUECTEL_EC2X
+#define MBED_CONF_QUECTEL_M26_BAUDRATE 115200 // set by library:QUECTEL_M26
+#define MBED_CONF_QUECTEL_M26_PROVIDE_DEFAULT 0 // set by library:QUECTEL_M26
+#define MBED_CONF_QUECTEL_UG96_BAUDRATE 115200 // set by library:QUECTEL_UG96
+#define MBED_CONF_QUECTEL_UG96_PROVIDE_DEFAULT 0 // set by library:QUECTEL_UG96
+#define MBED_CONF_RM1000_AT_BAUDRATE 230400 // set by library:RM1000_AT
+#define MBED_CONF_RM1000_AT_PROVIDE_DEFAULT 0 // set by library:RM1000_AT
+#define MBED_CONF_RTOS_API_PRESENT 1 // set by library:rtos-api
+#define MBED_CONF_RTOS_ENABLE_ALL_RTX_EVENTS 0 // set by library:rtos
+#define MBED_CONF_RTOS_EVFLAGS_NUM 0 // set by library:rtos
+#define MBED_CONF_RTOS_IDLE_THREAD_STACK_SIZE 512 // set by library:rtos
+#define MBED_CONF_RTOS_IDLE_THREAD_STACK_SIZE_DEBUG_EXTRA 128 // set by library:rtos[STM]
+#define MBED_CONF_RTOS_IDLE_THREAD_STACK_SIZE_TICKLESS_EXTRA 256 // set by library:rtos
+#define MBED_CONF_RTOS_MAIN_THREAD_STACK_SIZE 4096 // set by library:rtos
+#define MBED_CONF_RTOS_MSGQUEUE_DATA_SIZE 0 // set by library:rtos
+#define MBED_CONF_RTOS_MSGQUEUE_NUM 0 // set by library:rtos
+#define MBED_CONF_RTOS_MUTEX_NUM 0 // set by library:rtos
+#define MBED_CONF_RTOS_PRESENT 1 // set by library:rtos
+#define MBED_CONF_RTOS_SEMAPHORE_NUM 0 // set by library:rtos
+#define MBED_CONF_RTOS_THREAD_NUM 0 // set by library:rtos
+#define MBED_CONF_RTOS_THREAD_STACK_SIZE 4096 // set by library:rtos
+#define MBED_CONF_RTOS_THREAD_USER_STACK_SIZE 0 // set by library:rtos
+#define MBED_CONF_RTOS_TIMER_NUM 0 // set by library:rtos
+#define MBED_CONF_RTOS_TIMER_THREAD_STACK_SIZE 768 // set by library:rtos
+#define MBED_CONF_S2LP_PROVIDE_DEFAULT 0 // set by library:s2lp
+#define MBED_CONF_SARA4_PPP_BAUDRATE 115200 // set by library:SARA4_PPP
+#define MBED_CONF_SARA4_PPP_PROVIDE_DEFAULT 0 // set by library:SARA4_PPP
+#define MBED_CONF_STM32_EMAC_ETH_PHY_ADDRESS 0 // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_AUTONEGOTIATION \
+ ETH_AUTONEGOTIATION_ENABLE // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_DUPLEXMODE ETH_MODE_FULLDUPLEX // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_DUPLEX_STATUS 0x0010 // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_MEDIA_INTERFACE \
+ ETH_MEDIA_INTERFACE_RMII // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_RESET_DELAY 500 // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_SPEED ETH_SPEED_100M // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_SPEED_STATUS 0x0004 // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_PHY_STATUS_REGISTER 31 // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_RXBUFNB 4 // set by library:stm32-emac
+#define MBED_CONF_STM32_EMAC_ETH_TXBUFNB 10 // set by library:stm32-emac[STM32H7]
+#define MBED_CONF_STM32_EMAC_THREAD_STACKSIZE 1024 // set by library:stm32-emac
+#define MBED_CONF_STORAGE_DEFAULT_KV kv // set by library:storage
+#define MBED_CONF_STORAGE_FILESYSTEM_BLOCKDEVICE default // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_EXTERNAL_BASE_ADDRESS 0 // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_EXTERNAL_SIZE 0 // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_FILESYSTEM default // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_FOLDER_PATH kvstore // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_INTERNAL_BASE_ADDRESS 0 // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_MOUNT_POINT kv // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_BLOCKDEVICE \
+ default // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_EXTERNAL_BASE_ADDRESS \
+ 0 // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_EXTERNAL_SIZE \
+ 0 // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_FILESYSTEM \
+ default // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_FOLDER_PATH \
+ kvstore // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_NO_RBP_MOUNT_POINT \
+ kv // set by library:storage_filesystem_no_rbp
+#define MBED_CONF_STORAGE_FILESYSTEM_RBP_INTERNAL_SIZE 0 // set by library:storage_filesystem
+#define MBED_CONF_STORAGE_STORAGE_TYPE TDB_INTERNAL // set by library:storage[NUCLEO_H743ZI2]
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_BLOCKDEVICE default // set by library:storage_tdb_external
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_EXTERNAL_BASE_ADDRESS \
+ 0 // set by library:storage_tdb_external
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_EXTERNAL_SIZE 0 // set by library:storage_tdb_external
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_INTERNAL_BASE_ADDRESS \
+ 0 // set by library:storage_tdb_external
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_NO_RBP_BLOCKDEVICE \
+ default // set by library:storage_tdb_external_no_rbp
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_NO_RBP_EXTERNAL_BASE_ADDRESS \
+ 0 // set by library:storage_tdb_external_no_rbp
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_NO_RBP_EXTERNAL_SIZE \
+ 0 // set by library:storage_tdb_external_no_rbp
+#define MBED_CONF_STORAGE_TDB_EXTERNAL_RBP_INTERNAL_SIZE 0 // set by library:storage_tdb_external
+#define MBED_CONF_STORAGE_TDB_INTERNAL_INTERNAL_BASE_ADDRESS \
+ 0 // set by library:storage_tdb_internal
+#define MBED_CONF_STORAGE_TDB_INTERNAL_INTERNAL_SIZE 0 // set by library:storage_tdb_internal
+#define MBED_CONF_TARGET_BOOT_STACK_SIZE 0x400 // set by library:rtos[*]
+#define MBED_CONF_TARGET_CONSOLE_UART 1 // set by target:Target
+#define MBED_CONF_TARGET_CUSTOM_TICKERS 1 // set by target:Target
+#define MBED_CONF_TARGET_DEEP_SLEEP_LATENCY 4 // set by target:MCU_STM32
+#define MBED_CONF_TARGET_DEFAULT_ADC_VREF NAN // set by target:Target
+#define MBED_CONF_TARGET_GPIO_RESET_AT_INIT 0 // set by target:MCU_STM32
+#define MBED_CONF_TARGET_I2C_TIMING_VALUE_ALGO 0 // set by target:MCU_STM32H7
+#define MBED_CONF_TARGET_INIT_US_TICKER_AT_BOOT 1 // set by target:MCU_STM32
+#define MBED_CONF_TARGET_INTERNAL_FLASH_UNIFORM_SECTORS 1 // set by target:Target
+#define MBED_CONF_TARGET_LPTICKER_LPTIM 1 // set by target:MCU_STM32H7
+#define MBED_CONF_TARGET_LPTICKER_LPTIM_CLOCK 1 // set by target:MCU_STM32
+#define MBED_CONF_TARGET_LPUART_CLOCK_SOURCE \
+ USE_LPUART_CLK_LSE | USE_LPUART_CLK_PCLK1 | USE_LPUART_CLK_PCLK3 // set by target:MCU_STM32
+#define MBED_CONF_TARGET_LSE_AVAILABLE 1 // set by target:MCU_STM32
+#define MBED_CONF_TARGET_LSE_DRIVE_LOAD_LEVEL RCC_LSEDRIVE_LOW // set by target:MCU_STM32H7
+#define MBED_CONF_TARGET_MPU_ROM_END 0x0fffffff // set by target:Target
+#define MBED_CONF_TARGET_NETWORK_DEFAULT_INTERFACE_TYPE ETHERNET // set by target:NUCLEO_H743ZI2
+#define MBED_CONF_TARGET_RTC_CLOCK_SOURCE USE_RTC_CLK_LSE_OR_LSI // set by target:MCU_STM32
+#define MBED_CONF_TARGET_SYSTEM_POWER_SUPPLY PWR_LDO_SUPPLY // set by target:MCU_STM32H743xI
+#define MBED_CONF_TARGET_TICKLESS_FROM_US_TICKER 0 // set by target:Target
+#define MBED_CONF_TARGET_XIP_ENABLE 0 // set by target:Target
+#define MBED_CONF_TELIT_HE910_BAUDRATE 115200 // set by library:TELIT_HE910
+#define MBED_CONF_TELIT_HE910_PROVIDE_DEFAULT 0 // set by library:TELIT_HE910
+#define MBED_CONF_TELIT_ME310_BAUDRATE 115200 // set by library:TELIT_ME310
+#define MBED_CONF_TELIT_ME310_PROVIDE_DEFAULT 0 // set by library:TELIT_ME310
+#define MBED_CONF_TELIT_ME910_BAUDRATE 115200 // set by library:TELIT_ME910
+#define MBED_CONF_TELIT_ME910_PROVIDE_DEFAULT 0 // set by library:TELIT_ME910
+#define MBED_CONF_UBLOX_AT_BAUDRATE 115200 // set by library:UBLOX_AT
+#define MBED_CONF_UBLOX_AT_PROVIDE_DEFAULT 0 // set by library:UBLOX_AT
+#define MBED_CONF_UBLOX_N2XX_BAUDRATE 9600 // set by library:UBLOX_N2XX
+#define MBED_CONF_UBLOX_N2XX_PROVIDE_DEFAULT 0 // set by library:UBLOX_N2XX
+#define MBED_CONF_UBLOX_PPP_BAUDRATE 115200 // set by library:UBLOX_PPP
+#define MBED_CONF_UBLOX_PPP_PROVIDE_DEFAULT 0 // set by library:UBLOX_PPP
+#define MBED_CRC_TABLE_SIZE 16 // set by library:drivers
+#define MBED_LFS2_BLOCK_CYCLES 1024 // set by library:littlefs2
+#define MBED_LFS2_BLOCK_SIZE 512 // set by library:littlefs2
+#define MBED_LFS2_CACHE_SIZE 64 // set by library:littlefs2
+#define MBED_LFS2_ENABLE_INFO 0 // set by library:littlefs2
+#define MBED_LFS2_INTRINSICS 1 // set by library:littlefs2
+#define MBED_LFS2_LOOKAHEAD_SIZE 64 // set by library:littlefs2
+#define MBED_LFS_BLOCK_SIZE 512 // set by library:littlefs
+#define MBED_LFS_ENABLE_INFO 0 // set by library:littlefs
+#define MBED_LFS_INTRINSICS 1 // set by library:littlefs
+#define MBED_LFS_LOOKAHEAD 512 // set by library:littlefs
+#define MBED_LFS_PROG_SIZE 64 // set by library:littlefs
+#define MBED_LFS_READ_SIZE 64 // set by library:littlefs
+#define MBED_STACK_DUMP_ENABLED 0 // set by library:platform
+#define MBED_TRACE_COLOR_THEME 0 // set by library:mbed-trace
+#define MEM_ALLOC malloc // set by library:mbed-trace
+#define MEM_FREE free // set by library:mbed-trace
+#define PPP_DEBUG 0 // set by library:ppp
+#define STM32_D11_SPI_ETHERNET_PIN PB_5 // set by target:NUCLEO_H743ZI2
+// Macros
+#define MBEDTLS_CIPHER_MODE_CTR // defined by library:SecureStore
+#define NSAPI_PPP_AVAILABLE \
+ (MBED_CONF_PPP_ENABLED || MBED_CONF_LWIP_PPP_ENABLED) // defined by library:ppp
+#define NSDYNMEM_TRACKER_ENABLED \
+ MBED_CONF_NANOSTACK_LIBSERVICE_NSDYNMEM_TRACKER_ENABLED // defined by library:nanostack-libservice
+#define NS_USE_EXTERNAL_MBED_TLS // defined by library:nanostack
+#define UNITY_INCLUDE_CONFIG_H // defined by library:utest
+#define _RTE_ // defined by library:rtos
+
+#endif
--- /dev/null
+.syntax unified
+.cpu cortex-m7
+.fpu softvfp
+.thumb
+
+.global g_pfnVectors
+.global Default_Handler
+
+.word _sidata
+.word _sdata
+.word _edata
+.word _sbss
+.word _ebss
+.section .text.Reset_Handler
+.weak Reset_Handler
+.type Reset_Handler, %function
+
+Reset_Handler:
+ ldr sp, =_estack
+ bl main
+
+CopyDataInit:
+ ldr r3, =_sidata
+ ldr r3, [r3, r1]
+ str r3, [r0, r1]
+ adds r1, r1, #4
+
+LoopCopyDataInit:
+ ldr r0, =_sdata
+ ldr r3, =_edata
+ adds r2, r0, r1
+ cmp r2, r3
+ bcc CopyDataInit
+ ldr r2, =_sbss
+ b LoopFillZerobss
+
+FillZerobss:
+ movs r3, #0
+ str r3, [r2], #4
+
+LoopFillZerobss:
+ ldr r3, = _ebss
+ cmp r2, r3
+ bcc FillZerobss
+
+ bl _start
+ bx lr
+
+.size Reset_Handler, .-Reset_Handler
+
+.section .text.Default_Handler,"ax",%progbits
+
+Default_Handler:
+Infinite_Loop:
+ b Infinite_Loop
+ .size Default_Handler, .-Default_Handler
+ .section .isr_vector,"a",%progbits
+ .type g_pfnVectors, %object
+ .size g_pfnVectors, .-g_pfnVectors
+
+
+g_pfnVectors:
+ .word _estack
+ .word Reset_Handler
+
+ .word NMI_Handler
+ .word HardFault_Handler
+ .word MemManage_Handler
+ .word BusFault_Handler
+ .word UsageFault_Handler
+ .word 0
+ .word 0
+ .word 0
+ .word 0
+ .word SVC_Handler
+ .word DebugMon_Handler
+ .word 0
+ .word PendSV_Handler
+ .word SysTick_Handler
+
+ /* External Interrupts */
+ .word WWDG_IRQHandler /* Window WatchDog */
+ .word PVD_AVD_IRQHandler /* PVD/AVD through EXTI Line detection */
+ .word TAMP_STAMP_IRQHandler /* Tamper and TimeStamps through the EXTI line */
+ .word RTC_WKUP_IRQHandler /* RTC Wakeup through the EXTI line */
+ .word FLASH_IRQHandler /* FLASH */
+ .word RCC_IRQHandler /* RCC */
+ .word EXTI0_IRQHandler /* EXTI Line0 */
+ .word EXTI1_IRQHandler /* EXTI Line1 */
+ .word EXTI2_IRQHandler /* EXTI Line2 */
+ .word EXTI3_IRQHandler /* EXTI Line3 */
+ .word EXTI4_IRQHandler /* EXTI Line4 */
+ .word DMA1_Stream0_IRQHandler /* DMA1 Stream 0 */
+ .word DMA1_Stream1_IRQHandler /* DMA1 Stream 1 */
+ .word DMA1_Stream2_IRQHandler /* DMA1 Stream 2 */
+ .word DMA1_Stream3_IRQHandler /* DMA1 Stream 3 */
+ .word DMA1_Stream4_IRQHandler /* DMA1 Stream 4 */
+ .word DMA1_Stream5_IRQHandler /* DMA1 Stream 5 */
+ .word DMA1_Stream6_IRQHandler /* DMA1 Stream 6 */
+ .word ADC_IRQHandler /* ADC1, ADC2 and ADC3s */
+ .word FDCAN1_IT0_IRQHandler /* FDCAN1 interrupt line 0 */
+ .word FDCAN2_IT0_IRQHandler /* FDCAN2 interrupt line 0 */
+ .word FDCAN1_IT1_IRQHandler /* FDCAN1 interrupt line 1 */
+ .word FDCAN2_IT1_IRQHandler /* FDCAN2 interrupt line 1 */
+ .word EXTI9_5_IRQHandler /* External Line[9:5]s */
+ .word TIM1_BRK_IRQHandler /* TIM1 Break interrupt */
+ .word TIM1_UP_IRQHandler /* TIM1 Update interrupt */
+ .word TIM1_TRG_COM_IRQHandler /* TIM1 Trigger and Commutation interrupt */
+ .word TIM1_CC_IRQHandler /* TIM1 Capture Compare */
+ .word TIM2_IRQHandler /* TIM2 */
+ .word TIM3_IRQHandler /* TIM3 */
+ .word TIM4_IRQHandler /* TIM4 */
+ .word I2C1_EV_IRQHandler /* I2C1 Event */
+ .word I2C1_ER_IRQHandler /* I2C1 Error */
+ .word I2C2_EV_IRQHandler /* I2C2 Event */
+ .word I2C2_ER_IRQHandler /* I2C2 Error */
+ .word SPI1_IRQHandler /* SPI1 */
+ .word SPI2_IRQHandler /* SPI2 */
+ .word USART1_IRQHandler /* USART1 */
+ .word USART2_IRQHandler /* USART2 */
+ .word USART3_IRQHandler /* USART3 */
+ .word EXTI15_10_IRQHandler /* External Line[15:10]s */
+ .word RTC_Alarm_IRQHandler /* RTC Alarm (A and B) through EXTI Line */
+ .word 0 /* Reserved */
+ .word TIM8_BRK_TIM12_IRQHandler /* TIM8 Break and TIM12 */
+ .word TIM8_UP_TIM13_IRQHandler /* TIM8 Update and TIM13 */
+ .word TIM8_TRG_COM_TIM14_IRQHandler /* TIM8 Trigger and Commutation and TIM14 */
+ .word TIM8_CC_IRQHandler /* TIM8 Capture Compare */
+ .word DMA1_Stream7_IRQHandler /* DMA1 Stream7 */
+ .word FMC_IRQHandler /* FMC */
+ .word SDMMC1_IRQHandler /* SDMMC1 */
+ .word TIM5_IRQHandler /* TIM5 */
+ .word SPI3_IRQHandler /* SPI3 */
+ .word UART4_IRQHandler /* UART4 */
+ .word UART5_IRQHandler /* UART5 */
+ .word TIM6_DAC_IRQHandler /* TIM6 and DAC1&2 underrun errors */
+ .word TIM7_IRQHandler /* TIM7 */
+ .word DMA2_Stream0_IRQHandler /* DMA2 Stream 0 */
+ .word DMA2_Stream1_IRQHandler /* DMA2 Stream 1 */
+ .word DMA2_Stream2_IRQHandler /* DMA2 Stream 2 */
+ .word DMA2_Stream3_IRQHandler /* DMA2 Stream 3 */
+ .word DMA2_Stream4_IRQHandler /* DMA2 Stream 4 */
+ .word ETH_IRQHandler /* Ethernet */
+ .word ETH_WKUP_IRQHandler /* Ethernet Wakeup through EXTI line */
+ .word FDCAN_CAL_IRQHandler /* FDCAN calibration unit interrupt*/
+ .word 0 /* Reserved */
+ .word 0 /* Reserved */
+ .word 0 /* Reserved */
+ .word 0 /* Reserved */
+ .word DMA2_Stream5_IRQHandler /* DMA2 Stream 5 */
+ .word DMA2_Stream6_IRQHandler /* DMA2 Stream 6 */
+ .word DMA2_Stream7_IRQHandler /* DMA2 Stream 7 */
+ .word USART6_IRQHandler /* USART6 */
+ .word I2C3_EV_IRQHandler /* I2C3 event */
+ .word I2C3_ER_IRQHandler /* I2C3 error */
+ .word OTG_HS_EP1_OUT_IRQHandler /* USB OTG HS End Point 1 Out */
+ .word OTG_HS_EP1_IN_IRQHandler /* USB OTG HS End Point 1 In */
+ .word OTG_HS_WKUP_IRQHandler /* USB OTG HS Wakeup through EXTI */
+ .word OTG_HS_IRQHandler /* USB OTG HS */
+ .word DCMI_IRQHandler /* DCMI */
+ .word 0 /* Reserved */
+ .word RNG_IRQHandler /* Rng */
+ .word FPU_IRQHandler /* FPU */
+ .word UART7_IRQHandler /* UART7 */
+ .word UART8_IRQHandler /* UART8 */
+ .word SPI4_IRQHandler /* SPI4 */
+ .word SPI5_IRQHandler /* SPI5 */
+ .word SPI6_IRQHandler /* SPI6 */
+ .word SAI1_IRQHandler /* SAI1 */
+ .word LTDC_IRQHandler /* LTDC */
+ .word LTDC_ER_IRQHandler /* LTDC error */
+ .word DMA2D_IRQHandler /* DMA2D */
+ .word SAI2_IRQHandler /* SAI2 */
+ .word QUADSPI_IRQHandler /* QUADSPI */
+ .word LPTIM1_IRQHandler /* LPTIM1 */
+ .word CEC_IRQHandler /* HDMI_CEC */
+ .word I2C4_EV_IRQHandler /* I2C4 Event */
+ .word I2C4_ER_IRQHandler /* I2C4 Error */
+ .word SPDIF_RX_IRQHandler /* SPDIF_RX */
+ .word OTG_FS_EP1_OUT_IRQHandler /* USB OTG FS End Point 1 Out */
+ .word OTG_FS_EP1_IN_IRQHandler /* USB OTG FS End Point 1 In */
+ .word OTG_FS_WKUP_IRQHandler /* USB OTG FS Wakeup through EXTI */
+ .word OTG_FS_IRQHandler /* USB OTG FS */
+ .word DMAMUX1_OVR_IRQHandler /* DMAMUX1 Overrun interrupt */
+ .word HRTIM1_Master_IRQHandler /* HRTIM Master Timer global Interrupt */
+ .word HRTIM1_TIMA_IRQHandler /* HRTIM Timer A global Interrupt */
+ .word HRTIM1_TIMB_IRQHandler /* HRTIM Timer B global Interrupt */
+ .word HRTIM1_TIMC_IRQHandler /* HRTIM Timer C global Interrupt */
+ .word HRTIM1_TIMD_IRQHandler /* HRTIM Timer D global Interrupt */
+ .word HRTIM1_TIME_IRQHandler /* HRTIM Timer E global Interrupt */
+ .word HRTIM1_FLT_IRQHandler /* HRTIM Fault global Interrupt */
+ .word DFSDM1_FLT0_IRQHandler /* DFSDM Filter0 Interrupt */
+ .word DFSDM1_FLT1_IRQHandler /* DFSDM Filter1 Interrupt */
+ .word DFSDM1_FLT2_IRQHandler /* DFSDM Filter2 Interrupt */
+ .word DFSDM1_FLT3_IRQHandler /* DFSDM Filter3 Interrupt */
+ .word SAI3_IRQHandler /* SAI3 global Interrupt */
+ .word SWPMI1_IRQHandler /* Serial Wire Interface 1 global interrupt */
+ .word TIM15_IRQHandler /* TIM15 global Interrupt */
+ .word TIM16_IRQHandler /* TIM16 global Interrupt */
+ .word TIM17_IRQHandler /* TIM17 global Interrupt */
+ .word MDIOS_WKUP_IRQHandler /* MDIOS Wakeup Interrupt */
+ .word MDIOS_IRQHandler /* MDIOS global Interrupt */
+ .word JPEG_IRQHandler /* JPEG global Interrupt */
+ .word MDMA_IRQHandler /* MDMA global Interrupt */
+ .word 0 /* Reserved */
+ .word SDMMC2_IRQHandler /* SDMMC2 global Interrupt */
+ .word HSEM1_IRQHandler /* HSEM1 global Interrupt */
+ .word 0 /* Reserved */
+ .word ADC3_IRQHandler /* ADC3 global Interrupt */
+ .word DMAMUX2_OVR_IRQHandler /* DMAMUX Overrun interrupt */
+ .word BDMA_Channel0_IRQHandler /* BDMA Channel 0 global Interrupt */
+ .word BDMA_Channel1_IRQHandler /* BDMA Channel 1 global Interrupt */
+ .word BDMA_Channel2_IRQHandler /* BDMA Channel 2 global Interrupt */
+ .word BDMA_Channel3_IRQHandler /* BDMA Channel 3 global Interrupt */
+ .word BDMA_Channel4_IRQHandler /* BDMA Channel 4 global Interrupt */
+ .word BDMA_Channel5_IRQHandler /* BDMA Channel 5 global Interrupt */
+ .word BDMA_Channel6_IRQHandler /* BDMA Channel 6 global Interrupt */
+ .word BDMA_Channel7_IRQHandler /* BDMA Channel 7 global Interrupt */
+ .word COMP1_IRQHandler /* COMP1 global Interrupt */
+ .word LPTIM2_IRQHandler /* LP TIM2 global interrupt */
+ .word LPTIM3_IRQHandler /* LP TIM3 global interrupt */
+ .word LPTIM4_IRQHandler /* LP TIM4 global interrupt */
+ .word LPTIM5_IRQHandler /* LP TIM5 global interrupt */
+ .word LPUART1_IRQHandler /* LP UART1 interrupt */
+ .word 0 /* Reserved */
+ .word CRS_IRQHandler /* Clock Recovery Global Interrupt */
+ .word ECC_IRQHandler /* ECC diagnostic Global Interrupt */
+ .word SAI4_IRQHandler /* SAI4 global interrupt */
+ .word 0 /* Reserved */
+ .word 0 /* Reserved */
+ .word WAKEUP_PIN_IRQHandler /* Interrupt for all 6 wake-up pins */
+
+ .weak NMI_Handler
+ .thumb_set NMI_Handler,Default_Handler
+
+ .weak HardFault_Handler
+ .thumb_set HardFault_Handler,Default_Handler
+
+ .weak MemManage_Handler
+ .thumb_set MemManage_Handler,Default_Handler
+
+ .weak BusFault_Handler
+ .thumb_set BusFault_Handler,Default_Handler
+
+ .weak UsageFault_Handler
+ .thumb_set UsageFault_Handler,Default_Handler
+
+ .weak SVC_Handler
+ .thumb_set SVC_Handler,Default_Handler
+
+ .weak DebugMon_Handler
+ .thumb_set DebugMon_Handler,Default_Handler
+
+ .weak PendSV_Handler
+ .thumb_set PendSV_Handler,Default_Handler
+
+ .weak SysTick_Handler
+ .thumb_set SysTick_Handler,Default_Handler
+
+ .weak WWDG_IRQHandler
+ .thumb_set WWDG_IRQHandler,Default_Handler
+
+ .weak PVD_AVD_IRQHandler
+ .thumb_set PVD_AVD_IRQHandler,Default_Handler
+
+ .weak TAMP_STAMP_IRQHandler
+ .thumb_set TAMP_STAMP_IRQHandler,Default_Handler
+
+ .weak RTC_WKUP_IRQHandler
+ .thumb_set RTC_WKUP_IRQHandler,Default_Handler
+
+ .weak FLASH_IRQHandler
+ .thumb_set FLASH_IRQHandler,Default_Handler
+
+ .weak RCC_IRQHandler
+ .thumb_set RCC_IRQHandler,Default_Handler
+
+ .weak EXTI0_IRQHandler
+ .thumb_set EXTI0_IRQHandler,Default_Handler
+
+ .weak EXTI1_IRQHandler
+ .thumb_set EXTI1_IRQHandler,Default_Handler
+
+ .weak EXTI2_IRQHandler
+ .thumb_set EXTI2_IRQHandler,Default_Handler
+
+ .weak EXTI3_IRQHandler
+ .thumb_set EXTI3_IRQHandler,Default_Handler
+
+ .weak EXTI4_IRQHandler
+ .thumb_set EXTI4_IRQHandler,Default_Handler
+
+ .weak DMA1_Stream0_IRQHandler
+ .thumb_set DMA1_Stream0_IRQHandler,Default_Handler
+
+ .weak DMA1_Stream1_IRQHandler
+ .thumb_set DMA1_Stream1_IRQHandler,Default_Handler
+
+ .weak DMA1_Stream2_IRQHandler
+ .thumb_set DMA1_Stream2_IRQHandler,Default_Handler
+
+ .weak DMA1_Stream3_IRQHandler
+ .thumb_set DMA1_Stream3_IRQHandler,Default_Handler
+
+ .weak DMA1_Stream4_IRQHandler
+ .thumb_set DMA1_Stream4_IRQHandler,Default_Handler
+
+ .weak DMA1_Stream5_IRQHandler
+ .thumb_set DMA1_Stream5_IRQHandler,Default_Handler
+
+ .weak DMA1_Stream6_IRQHandler
+ .thumb_set DMA1_Stream6_IRQHandler,Default_Handler
+
+ .weak ADC_IRQHandler
+ .thumb_set ADC_IRQHandler,Default_Handler
+
+ .weak FDCAN1_IT0_IRQHandler
+ .thumb_set FDCAN1_IT0_IRQHandler,Default_Handler
+
+ .weak FDCAN2_IT0_IRQHandler
+ .thumb_set FDCAN2_IT0_IRQHandler,Default_Handler
+
+ .weak FDCAN1_IT1_IRQHandler
+ .thumb_set FDCAN1_IT1_IRQHandler,Default_Handler
+
+ .weak FDCAN2_IT1_IRQHandler
+ .thumb_set FDCAN2_IT1_IRQHandler,Default_Handler
+
+ .weak EXTI9_5_IRQHandler
+ .thumb_set EXTI9_5_IRQHandler,Default_Handler
+
+ .weak TIM1_BRK_IRQHandler
+ .thumb_set TIM1_BRK_IRQHandler,Default_Handler
+
+ .weak TIM1_UP_IRQHandler
+ .thumb_set TIM1_UP_IRQHandler,Default_Handler
+
+ .weak TIM1_TRG_COM_IRQHandler
+ .thumb_set TIM1_TRG_COM_IRQHandler,Default_Handler
+
+ .weak TIM1_CC_IRQHandler
+ .thumb_set TIM1_CC_IRQHandler,Default_Handler
+
+ .weak TIM2_IRQHandler
+ .thumb_set TIM2_IRQHandler,Default_Handler
+
+ .weak TIM3_IRQHandler
+ .thumb_set TIM3_IRQHandler,Default_Handler
+
+ .weak TIM4_IRQHandler
+ .thumb_set TIM4_IRQHandler,Default_Handler
+
+ .weak I2C1_EV_IRQHandler
+ .thumb_set I2C1_EV_IRQHandler,Default_Handler
+
+ .weak I2C1_ER_IRQHandler
+ .thumb_set I2C1_ER_IRQHandler,Default_Handler
+
+ .weak I2C2_EV_IRQHandler
+ .thumb_set I2C2_EV_IRQHandler,Default_Handler
+
+ .weak I2C2_ER_IRQHandler
+ .thumb_set I2C2_ER_IRQHandler,Default_Handler
+
+ .weak SPI1_IRQHandler
+ .thumb_set SPI1_IRQHandler,Default_Handler
+
+ .weak SPI2_IRQHandler
+ .thumb_set SPI2_IRQHandler,Default_Handler
+
+ .weak USART1_IRQHandler
+ .thumb_set USART1_IRQHandler,Default_Handler
+
+ .weak USART2_IRQHandler
+ .thumb_set USART2_IRQHandler,Default_Handler
+
+ .weak USART3_IRQHandler
+ .thumb_set USART3_IRQHandler,Default_Handler
+
+ .weak EXTI15_10_IRQHandler
+ .thumb_set EXTI15_10_IRQHandler,Default_Handler
+
+ .weak RTC_Alarm_IRQHandler
+ .thumb_set RTC_Alarm_IRQHandler,Default_Handler
+
+ .weak TIM8_BRK_TIM12_IRQHandler
+ .thumb_set TIM8_BRK_TIM12_IRQHandler,Default_Handler
+
+ .weak TIM8_UP_TIM13_IRQHandler
+ .thumb_set TIM8_UP_TIM13_IRQHandler,Default_Handler
+
+ .weak TIM8_TRG_COM_TIM14_IRQHandler
+ .thumb_set TIM8_TRG_COM_TIM14_IRQHandler,Default_Handler
+
+ .weak TIM8_CC_IRQHandler
+ .thumb_set TIM8_CC_IRQHandler,Default_Handler
+
+ .weak DMA1_Stream7_IRQHandler
+ .thumb_set DMA1_Stream7_IRQHandler,Default_Handler
+
+ .weak FMC_IRQHandler
+ .thumb_set FMC_IRQHandler,Default_Handler
+
+ .weak SDMMC1_IRQHandler
+ .thumb_set SDMMC1_IRQHandler,Default_Handler
+
+ .weak TIM5_IRQHandler
+ .thumb_set TIM5_IRQHandler,Default_Handler
+
+ .weak SPI3_IRQHandler
+ .thumb_set SPI3_IRQHandler,Default_Handler
+
+ .weak UART4_IRQHandler
+ .thumb_set UART4_IRQHandler,Default_Handler
+
+ .weak UART5_IRQHandler
+ .thumb_set UART5_IRQHandler,Default_Handler
+
+ .weak TIM6_DAC_IRQHandler
+ .thumb_set TIM6_DAC_IRQHandler,Default_Handler
+
+ .weak TIM7_IRQHandler
+ .thumb_set TIM7_IRQHandler,Default_Handler
+
+ .weak DMA2_Stream0_IRQHandler
+ .thumb_set DMA2_Stream0_IRQHandler,Default_Handler
+
+ .weak DMA2_Stream1_IRQHandler
+ .thumb_set DMA2_Stream1_IRQHandler,Default_Handler
+
+ .weak DMA2_Stream2_IRQHandler
+ .thumb_set DMA2_Stream2_IRQHandler,Default_Handler
+
+ .weak DMA2_Stream3_IRQHandler
+ .thumb_set DMA2_Stream3_IRQHandler,Default_Handler
+
+ .weak DMA2_Stream4_IRQHandler
+ .thumb_set DMA2_Stream4_IRQHandler,Default_Handler
+
+ .weak ETH_IRQHandler
+ .thumb_set ETH_IRQHandler,Default_Handler
+
+ .weak ETH_WKUP_IRQHandler
+ .thumb_set ETH_WKUP_IRQHandler,Default_Handler
+
+ .weak FDCAN_CAL_IRQHandler
+ .thumb_set FDCAN_CAL_IRQHandler,Default_Handler
+
+ .weak DMA2_Stream5_IRQHandler
+ .thumb_set DMA2_Stream5_IRQHandler,Default_Handler
+
+ .weak DMA2_Stream6_IRQHandler
+ .thumb_set DMA2_Stream6_IRQHandler,Default_Handler
+
+ .weak DMA2_Stream7_IRQHandler
+ .thumb_set DMA2_Stream7_IRQHandler,Default_Handler
+
+ .weak USART6_IRQHandler
+ .thumb_set USART6_IRQHandler,Default_Handler
+
+ .weak I2C3_EV_IRQHandler
+ .thumb_set I2C3_EV_IRQHandler,Default_Handler
+
+ .weak I2C3_ER_IRQHandler
+ .thumb_set I2C3_ER_IRQHandler,Default_Handler
+
+ .weak OTG_HS_EP1_OUT_IRQHandler
+ .thumb_set OTG_HS_EP1_OUT_IRQHandler,Default_Handler
+
+ .weak OTG_HS_EP1_IN_IRQHandler
+ .thumb_set OTG_HS_EP1_IN_IRQHandler,Default_Handler
+
+ .weak OTG_HS_WKUP_IRQHandler
+ .thumb_set OTG_HS_WKUP_IRQHandler,Default_Handler
+
+ .weak OTG_HS_IRQHandler
+ .thumb_set OTG_HS_IRQHandler,Default_Handler
+
+ .weak DCMI_IRQHandler
+ .thumb_set DCMI_IRQHandler,Default_Handler
+
+ .weak RNG_IRQHandler
+ .thumb_set RNG_IRQHandler,Default_Handler
+
+ .weak FPU_IRQHandler
+ .thumb_set FPU_IRQHandler,Default_Handler
+
+ .weak UART7_IRQHandler
+ .thumb_set UART7_IRQHandler,Default_Handler
+
+ .weak UART8_IRQHandler
+ .thumb_set UART8_IRQHandler,Default_Handler
+
+ .weak SPI4_IRQHandler
+ .thumb_set SPI4_IRQHandler,Default_Handler
+
+ .weak SPI5_IRQHandler
+ .thumb_set SPI5_IRQHandler,Default_Handler
+
+ .weak SPI6_IRQHandler
+ .thumb_set SPI6_IRQHandler,Default_Handler
+
+ .weak SAI1_IRQHandler
+ .thumb_set SAI1_IRQHandler,Default_Handler
+
+ .weak LTDC_IRQHandler
+ .thumb_set LTDC_IRQHandler,Default_Handler
+
+ .weak LTDC_ER_IRQHandler
+ .thumb_set LTDC_ER_IRQHandler,Default_Handler
+
+ .weak DMA2D_IRQHandler
+ .thumb_set DMA2D_IRQHandler,Default_Handler
+
+ .weak SAI2_IRQHandler
+ .thumb_set SAI2_IRQHandler,Default_Handler
+
+ .weak QUADSPI_IRQHandler
+ .thumb_set QUADSPI_IRQHandler,Default_Handler
+
+ .weak LPTIM1_IRQHandler
+ .thumb_set LPTIM1_IRQHandler,Default_Handler
+
+ .weak CEC_IRQHandler
+ .thumb_set CEC_IRQHandler,Default_Handler
+
+ .weak I2C4_EV_IRQHandler
+ .thumb_set I2C4_EV_IRQHandler,Default_Handler
+
+ .weak I2C4_ER_IRQHandler
+ .thumb_set I2C4_ER_IRQHandler,Default_Handler
+
+ .weak SPDIF_RX_IRQHandler
+ .thumb_set SPDIF_RX_IRQHandler,Default_Handler
+
+ .weak OTG_FS_EP1_OUT_IRQHandler
+ .thumb_set OTG_FS_EP1_OUT_IRQHandler,Default_Handler
+
+ .weak OTG_FS_EP1_IN_IRQHandler
+ .thumb_set OTG_FS_EP1_IN_IRQHandler,Default_Handler
+
+ .weak OTG_FS_WKUP_IRQHandler
+ .thumb_set OTG_FS_WKUP_IRQHandler,Default_Handler
+
+ .weak OTG_FS_IRQHandler
+ .thumb_set OTG_FS_IRQHandler,Default_Handler
+
+ .weak DMAMUX1_OVR_IRQHandler
+ .thumb_set DMAMUX1_OVR_IRQHandler,Default_Handler
+
+ .weak HRTIM1_Master_IRQHandler
+ .thumb_set HRTIM1_Master_IRQHandler,Default_Handler
+
+ .weak HRTIM1_TIMA_IRQHandler
+ .thumb_set HRTIM1_TIMA_IRQHandler,Default_Handler
+
+ .weak HRTIM1_TIMB_IRQHandler
+ .thumb_set HRTIM1_TIMB_IRQHandler,Default_Handler
+
+ .weak HRTIM1_TIMC_IRQHandler
+ .thumb_set HRTIM1_TIMC_IRQHandler,Default_Handler
+
+ .weak HRTIM1_TIMD_IRQHandler
+ .thumb_set HRTIM1_TIMD_IRQHandler,Default_Handler
+
+ .weak HRTIM1_TIME_IRQHandler
+ .thumb_set HRTIM1_TIME_IRQHandler,Default_Handler
+
+ .weak HRTIM1_FLT_IRQHandler
+ .thumb_set HRTIM1_FLT_IRQHandler,Default_Handler
+
+ .weak DFSDM1_FLT0_IRQHandler
+ .thumb_set DFSDM1_FLT0_IRQHandler,Default_Handler
+
+ .weak DFSDM1_FLT1_IRQHandler
+ .thumb_set DFSDM1_FLT1_IRQHandler,Default_Handler
+
+ .weak DFSDM1_FLT2_IRQHandler
+ .thumb_set DFSDM1_FLT2_IRQHandler,Default_Handler
+
+ .weak DFSDM1_FLT3_IRQHandler
+ .thumb_set DFSDM1_FLT3_IRQHandler,Default_Handler
+
+ .weak SAI3_IRQHandler
+ .thumb_set SAI3_IRQHandler,Default_Handler
+
+ .weak SWPMI1_IRQHandler
+ .thumb_set SWPMI1_IRQHandler,Default_Handler
+
+ .weak TIM15_IRQHandler
+ .thumb_set TIM15_IRQHandler,Default_Handler
+
+ .weak TIM16_IRQHandler
+ .thumb_set TIM16_IRQHandler,Default_Handler
+
+ .weak TIM17_IRQHandler
+ .thumb_set TIM17_IRQHandler,Default_Handler
+
+ .weak MDIOS_WKUP_IRQHandler
+ .thumb_set MDIOS_WKUP_IRQHandler,Default_Handler
+
+ .weak MDIOS_IRQHandler
+ .thumb_set MDIOS_IRQHandler,Default_Handler
+
+ .weak JPEG_IRQHandler
+ .thumb_set JPEG_IRQHandler,Default_Handler
+
+ .weak MDMA_IRQHandler
+ .thumb_set MDMA_IRQHandler,Default_Handler
+
+ .weak SDMMC2_IRQHandler
+ .thumb_set SDMMC2_IRQHandler,Default_Handler
+
+ .weak HSEM1_IRQHandler
+ .thumb_set HSEM1_IRQHandler,Default_Handler
+
+ .weak ADC3_IRQHandler
+ .thumb_set ADC3_IRQHandler,Default_Handler
+
+ .weak DMAMUX2_OVR_IRQHandler
+ .thumb_set DMAMUX2_OVR_IRQHandler,Default_Handler
+
+ .weak BDMA_Channel0_IRQHandler
+ .thumb_set BDMA_Channel0_IRQHandler,Default_Handler
+
+ .weak BDMA_Channel1_IRQHandler
+ .thumb_set BDMA_Channel1_IRQHandler,Default_Handler
+
+ .weak BDMA_Channel2_IRQHandler
+ .thumb_set BDMA_Channel2_IRQHandler,Default_Handler
+
+ .weak BDMA_Channel3_IRQHandler
+ .thumb_set BDMA_Channel3_IRQHandler,Default_Handler
+
+ .weak BDMA_Channel4_IRQHandler
+ .thumb_set BDMA_Channel4_IRQHandler,Default_Handler
+
+ .weak BDMA_Channel5_IRQHandler
+ .thumb_set BDMA_Channel5_IRQHandler,Default_Handler
+
+ .weak BDMA_Channel6_IRQHandler
+ .thumb_set BDMA_Channel6_IRQHandler,Default_Handler
+
+ .weak BDMA_Channel7_IRQHandler
+ .thumb_set BDMA_Channel7_IRQHandler,Default_Handler
+
+ .weak COMP1_IRQHandler
+ .thumb_set COMP1_IRQHandler,Default_Handler
+
+ .weak LPTIM2_IRQHandler
+ .thumb_set LPTIM2_IRQHandler,Default_Handler
+
+ .weak LPTIM3_IRQHandler
+ .thumb_set LPTIM3_IRQHandler,Default_Handler
+
+ .weak LPTIM4_IRQHandler
+ .thumb_set LPTIM4_IRQHandler,Default_Handler
+
+ .weak LPTIM5_IRQHandler
+ .thumb_set LPTIM5_IRQHandler,Default_Handler
+
+ .weak LPUART1_IRQHandler
+ .thumb_set LPUART1_IRQHandler,Default_Handler
+
+ .weak CRS_IRQHandler
+ .thumb_set CRS_IRQHandler,Default_Handler
+
+ .weak ECC_IRQHandler
+ .thumb_set ECC_IRQHandler,Default_Handler
+
+ .weak SAI4_IRQHandler
+ .thumb_set SAI4_IRQHandler,Default_Handler
+
+ .weak WAKEUP_PIN_IRQHandler
+ .thumb_set WAKEUP_PIN_IRQHandler,Default_Handler
Name: nnfw
Summary: nnfw
-Version: 1.21.0
+Version: 1.22.0
Release: 1
Group: Development
License: Apache-2.0 and MIT and BSD-2-Clause and MPL-2.0
Source0: %{name}-%{version}.tar.gz
Source1: %{name}.manifest
-# TODO Update source number
Source1001: nnapi_test_generated.tar.gz
-#Source1002: GTEST.tar.gz
-Source1003: TENSORFLOW-2.3.0-EIGEN.tar.gz
-Source1004: GEMMLOWP.tar.gz
-Source1005: TENSORFLOW-2.8.0-RUY.tar.gz
-Source1006: CPUINFO.tar.gz
-Source1007: XNNPACK.tar.gz
-Source1008: FXDIV.tar.gz
-Source1009: PTHREADPOOL.tar.gz
-Source1010: PSIMD.tar.gz
-Source1011: FP16.tar.gz
-Source1012: OPENCL_HEADERS.tar.gz
-Source1013: FARMHASH.tar.gz
-Source1014: ABSEIL.tar.gz
-Source1015: OOURAFFT.tar.gz
-Source1016: TENSORFLOW_GPU.tar.gz
Source2001: nnfw.pc.in
Source2002: nnfw-plugin.pc.in
+Source3001: ABSEIL.tar.gz
+Source3002: CPUINFO.tar.gz
+Source3003: EGL_HEADERS.tar.gz
+Source3004: FARMHASH.tar.gz
+Source3005: FP16.tar.gz
+Source3006: FXDIV.tar.gz
+Source3007: GEMMLOWP.tar.gz
+Source3008: OOURAFFT.tar.gz
+Source3009: OPENCL_HEADERS.tar.gz
+Source3010: OPENGL_HEADERS.tar.gz
+Source3011: PSIMD.tar.gz
+Source3012: PTHREADPOOL.tar.gz
+Source3013: TENSORFLOW-2.8.0-EIGEN.tar.gz
+Source3014: TENSORFLOW-2.8.0-GEMMLOWP.tar.gz
+Source3015: TENSORFLOW-2.8.0-RUY.tar.gz
+Source3016: TENSORFLOW-2.8.0.tar.gz
+Source3017: VULKAN.tar.gz
+Source3018: XNNPACK.tar.gz
+Source3019: FLATBUFFERS-2.0.tar.gz
%{!?build_type: %define build_type Release}
%{!?npud_build: %define npud_build 1}
%endif
BuildRequires: cmake
-# Require flatbuffers-devel for onert frontend (model loading)
-BuildRequires: pkgconfig(flatbuffers)
-
-%ifarch %{arm} aarch64
-# Require python for acl-ex library build pre-process
-BuildRequires: python3
-BuildRequires: libarmcl-devel >= v21.02
-%endif
Requires(post): /sbin/ldconfig
Requires(postun): /sbin/ldconfig
%if %{test_build} == 1
BuildRequires: pkgconfig(boost)
-BuildRequires: pkgconfig(tensorflow-lite)
+BuildRequires: pkgconfig(tensorflow2-lite)
BuildRequires: hdf5-devel
BuildRequires: libaec-devel
BuildRequires: pkgconfig(zlib)
cp %{SOURCE1} .
mkdir ./externals
tar -xf %{SOURCE1001} -C ./tests/nnapi/src/
-#tar -xf %{SOURCE1002} -C ./externals
-tar -xf %{SOURCE1003} -C ./externals
-tar -xf %{SOURCE1004} -C ./externals
-tar -xf %{SOURCE1005} -C ./externals
-tar -xf %{SOURCE1006} -C ./externals
-tar -xf %{SOURCE1007} -C ./externals
-tar -xf %{SOURCE1008} -C ./externals
-tar -xf %{SOURCE1009} -C ./externals
-tar -xf %{SOURCE1010} -C ./externals
-tar -xf %{SOURCE1011} -C ./externals
-tar -xf %{SOURCE1012} -C ./externals
-tar -xf %{SOURCE1013} -C ./externals
-tar -xf %{SOURCE1014} -C ./externals
-tar -xf %{SOURCE1015} -C ./externals
-tar -xf %{SOURCE1016} -C ./externals
+tar -xf %{SOURCE3001} -C ./externals
+tar -xf %{SOURCE3002} -C ./externals
+tar -xf %{SOURCE3003} -C ./externals
+tar -xf %{SOURCE3004} -C ./externals
+tar -xf %{SOURCE3005} -C ./externals
+tar -xf %{SOURCE3006} -C ./externals
+tar -xf %{SOURCE3007} -C ./externals
+tar -xf %{SOURCE3008} -C ./externals
+tar -xf %{SOURCE3009} -C ./externals
+tar -xf %{SOURCE3010} -C ./externals
+tar -xf %{SOURCE3011} -C ./externals
+tar -xf %{SOURCE3012} -C ./externals
+tar -xf %{SOURCE3013} -C ./externals
+tar -xf %{SOURCE3014} -C ./externals
+tar -xf %{SOURCE3015} -C ./externals
+tar -xf %{SOURCE3016} -C ./externals
+tar -xf %{SOURCE3017} -C ./externals
+tar -xf %{SOURCE3018} -C ./externals
+tar -xf %{SOURCE3019} -C ./externals
%build
%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
%if %{test_build} == 1
mkdir -p %{test_install_path}/bin
+mkdir -p %{test_install_path}/nnapi-gtest
mkdir -p %{test_install_path}/unittest
-mkdir -p %{test_install_path}/unittest_standalone
mkdir -p %{test_install_path}/test
-install -m 755 build/out/bin/nnapi_test %{test_install_path}/bin
-install -m 755 build/out/bin/nnpackage_run %{test_install_path}/bin
+install -m 755 build/out/bin/onert_run %{test_install_path}/bin
install -m 755 build/out/bin/tflite_comparator %{test_install_path}/bin
install -m 755 build/out/bin/tflite_run %{test_install_path}/bin
-install -m 755 build/out/unittest/* %{test_install_path}/unittest
-install -m 755 build/out/unittest_standalone/*_test %{test_install_path}/unittest_standalone
-install -m 755 build/out/unittest_standalone/test_* %{test_install_path}/unittest_standalone
+install -m 755 build/out/nnapi-gtest/* %{test_install_path}/nnapi-gtest
+install -m 755 build/out/unittest/*_test %{test_install_path}/unittest
+install -m 755 build/out/unittest/test_* %{test_install_path}/unittest
cp -r build/out/test/* %{test_install_path}/test
-cp -r build/out/unittest_standalone/nnfw_api_gtest_models %{test_install_path}/unittest_standalone
+cp -r build/out/unittest/nnfw_api_gtest_models %{test_install_path}/unittest
# Share test script with ubuntu (ignore error if there is no list for target)
-cp tests/nnapi/nnapi_gtest.skip.%{target_arch}-* %{test_install_path}/unittest/.
-cp %{test_install_path}/unittest/nnapi_gtest.skip.%{target_arch}-linux.cpu %{test_install_path}/unittest/nnapi_gtest.skip
+cp tests/nnapi/nnapi_gtest.skip.%{target_arch}-* %{test_install_path}/nnapi-gtest/.
+cp %{test_install_path}/nnapi-gtest/nnapi_gtest.skip.%{target_arch}-linux.cpu %{test_install_path}/nnapi-gtest/nnapi_gtest.skip
tar -zxf test-suite.tar.gz -C %{buildroot}%{test_install_home}
%if %{coverage_build} == 1
%if %{npud_build} == 1
install -m 755 build/out/bin/npud %{buildroot}%{_bindir}
-%endif
+
+%if %{test_build} == 1
+mkdir -p %{test_install_path}/npud-gtest
+install -m 755 build/out/npud-gtest/* %{test_install_path}/npud-gtest
+%endif # test_build
+
+%endif # npud_build
%endif
--- /dev/null
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Conv1dPReLU(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.op1 = nn.Conv1d(1, 1, 1)
+ self.op2 = nn.PReLU()
+
+ def forward(self, input):
+ return self.op2(self.op1(input))
+
+
+_model_ = net_Conv1dPReLU()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 1, 5)
--- /dev/null
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Conv2dPReLU(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.op1 = nn.Conv2d(1, 1, 1)
+ self.op2 = nn.PReLU()
+
+ def forward(self, input):
+ return self.op2(self.op1(input))
+
+
+_model_ = net_Conv2dPReLU()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 1, 5, 5)
if hasattr(module._model_, 'onnx_opset_version'):
opset_version = module._model_.onnx_opset_version()
+ onnx_model_path = output_folder + example + ".onnx"
+
torch.onnx.export(
module._model_,
module._dummy_,
- output_folder + example + ".onnx",
+ onnx_model_path,
verbose=True,
opset_version=opset_version)
print("Generate '" + example + ".onnx' - Done")
- onnx_model = onnx.load(output_folder + example + ".onnx")
+ onnx_model = onnx.load(onnx_model_path)
onnx.checker.check_model(onnx_model)
- tf_prep = onnx_tf.backend.prepare(onnx_model)
+ inferred_model = onnx.shape_inference.infer_shapes(onnx_model)
+ onnx.checker.check_model(inferred_model)
+ onnx.save(inferred_model, onnx_model_path)
+
+ tf_prep = onnx_tf.backend.prepare(inferred_model)
tf_prep.export_graph(path=output_folder + example + ".TF")
print("Generate '" + example + " TF' - Done")
--- /dev/null
+operand {
+ name: "x"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 4 }
+}
+operand {
+ name: "y"
+ type: FLOAT32
+ shape { dim: 2 dim: 4 }
+}
+operand {
+ name: "out"
+ type: FLOAT32
+ shape { dim: 2 dim: 2 }
+}
+operation {
+ type: "FullyConnected"
+ fullyconnected_options {
+ activation: NONE
+ keep_num_dims: false
+ }
+ input: "x"
+ input: "y"
+ input: ""
+ output: "out"
+}
+input: "x"
+input: "y"
+output: "out"
--- /dev/null
+# To check if FullyConnected with non-const weight is replaced by MatMul
+# with replace_non_const_fc_with_batch_matmul pass
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "BATCH_MATMUL_EXIST" $(op_count BATCH_MATMUL) '=' 1
+RULE "RESHAPE_EXIST" $(op_count RESHAPE) '=' 1
+RULE "NO_FULLY_CONNECTED" $(op_count FULLY_CONNECTED) '=' 0
--- /dev/null
+operand {
+ name: "in"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 3 }
+}
+operand {
+ name: "weight"
+ type: FLOAT32
+ shape { dim: 4 dim: 3 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 4 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "out"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 4 }
+}
+operation {
+ type: "FullyConnected"
+ fullyconnected_options {
+ activation: NONE
+ keep_num_dims: true
+ }
+ input: "in"
+ input: "weight"
+ input: "bias"
+ output: "out"
+}
+input: "in"
+output: "out"
--- /dev/null
+#
+# generated with tflchef-reverse from PReLUwConv2d ONNX model
+#
+operand {
+ name: "input"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+ name: "mul_1/y"
+ type: FLOAT32
+ shape {
+ }
+ filler {
+ tag: "explicit"
+ arg: "0.5"
+ }
+}
+operand {
+ name: "Const_2"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 }
+ filler {
+ tag: "explicit"
+ arg: "0.25"
+ }
+}
+operand {
+ name: "ConvWeight"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "0.1"
+ }
+}
+operand {
+ name: "ConvBias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "0.1"
+ }
+}
+operand {
+ name: "ConvOut"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+ name: "Abs"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+ name: "Relu1"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+ name: "sub"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+ name: "mul"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+ name: "mul_1"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operand {
+ name: "output"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 5 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ input: "input"
+ input: "ConvWeight"
+ input: "ConvBias"
+ output: "ConvOut"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ activation: NONE
+ dilation_w_factor: 1
+ dilation_h_factor: 1
+ }
+}
+operation {
+ type: "Abs"
+ input: "ConvOut"
+ output: "Abs"
+}
+operation {
+ type: "ReLU"
+ input: "ConvOut"
+ output: "Relu1"
+}
+operation {
+ type: "Sub"
+ input: "ConvOut"
+ input: "Abs"
+ output: "sub"
+ sub_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Mul"
+ input: "sub"
+ input: "Const_2"
+ output: "mul"
+ mul_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Mul"
+ input: "mul"
+ input: "mul_1/y"
+ output: "mul_1"
+ mul_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Add"
+ input: "Relu1"
+ input: "mul_1"
+ output: "output"
+ add_options {
+ activation: NONE
+ }
+}
+input: "input"
+output: "output"
--- /dev/null
+# To check if Sub-Graph can be converted to PReLU
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "ABS_NOT_EXIST" $(op_count ABS) '=' 0
+RULE "ADD_NOT_EXIST" $(op_count ADD) '=' 0
+RULE "MUL_NOT_EXIST" $(op_count MUL) '=' 0
+RULE "RELU_NOT_EXIST" $(op_count RELU) '=' 0
+RULE "SUB_NOT_EXIST" $(op_count SUB) '=' 0
+RULE "PRELU_EXIST" $(op_count PRELU) '=' 1
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 1 }
+}
+operand {
+ name: "filter"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 2 dim: 1 }
+ filler {
+ tag: "constant"
+ arg: "1"
+ }
+}
+operand {
+ name: "filter_1"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 2 dim: 1 }
+ filler {
+ tag: "constant"
+ arg: "1"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "constant"
+ arg: "1"
+ }
+}
+operand {
+ name: "bias_1"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "constant"
+ arg: "1"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 31 dim: 31 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "ifm"
+ input: "filter"
+ input: "bias"
+ output: "ofm"
+}
+operand {
+ name: "output"
+ type: FLOAT32
+ shape { dim: 1 dim: 30 dim: 30 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "ofm"
+ input: "filter_1"
+ input: "bias_1"
+ output: "output"
+}
+input: "ifm"
+output: "output"
--- /dev/null
+# To check if RemoveDuplicateConstPass removes all duplicate consts
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "FILTER_COUNT" $(const_count filter) '=' 1
+RULE "DUPLICATE_FILTER_COUNT" $(const_count filter_1) '=' 0
+RULE "BIAS_COUNT" $(const_count bias) '=' 1
+RULE "DUPLICATE_BIAS_COUNT" $(const_count bias_1) '=' 0
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 }
+}
+operand {
+ name: "fc_ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "fc_wgt"
+ type: FLOAT32
+ shape { dim: 8 dim: 16 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "fc_bias"
+ type: FLOAT32
+ shape { dim: 8 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "fc"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 }
+}
+operation {
+ type: "FullyConnected"
+ fullyconnected_options {
+ activation: NONE
+ }
+ input: "fc_ifm"
+ input: "fc_wgt"
+ input: "fc_bias"
+ output: "fc"
+}
+operation {
+ type: "Add"
+ input: "ifm"
+ input: "fc"
+ output: "ofm"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+# To check if FullyConnected is folded
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "ADD_EXIST" $(op_count ADD) '=' 1
+RULE "NO_FC" $(op_count FULLY_CONNECTED) '=' 0
--- /dev/null
+# Tconv with asymmetric filter + BN + ReLU
+operand {
+ name: "Hole"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ dim: 1
+ dim: 2
+ }
+}
+operand {
+ name: "conv2d_transpose/input_sizes"
+ type: INT32
+ shape {
+ dim: 4
+ }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ arg: "5"
+ arg: "1"
+ arg: "2"
+ }
+}
+operand {
+ name: "FusedBatchNormV3"
+ type: FLOAT32
+ shape {
+ dim: 2
+ }
+ filler {
+ tag: "explicit"
+ arg: "-2.04724"
+ arg: "-7.80109"
+ }
+}
+operand {
+ name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes"
+ type: FLOAT32
+ shape {
+ dim: 2
+ dim: 5
+ dim: 1
+ dim: 2
+ }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "0.1"
+ }
+}
+operand {
+ name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 5
+ dim: 1
+ dim: 2
+ }
+}
+operand {
+ name: "FusedBatchNormV3_mul_0"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 5
+ dim: 1
+ dim: 2
+ }
+}
+operand {
+ name: "FusedBatchNormV3_mul_0_param"
+ type: FLOAT32
+ shape {
+ dim: 2
+ }
+ filler {
+ tag: "explicit"
+ arg: "2.00834"
+ arg: "1.00344"
+ }
+}
+operand {
+ name: "Relu"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 5
+ dim: 1
+ dim: 2
+ }
+}
+operation {
+ type: "TransposeConv"
+ input: "conv2d_transpose/input_sizes"
+ input: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes"
+ input: "Hole"
+ output: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+ transpose_conv_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+}
+operation {
+ type: "Mul"
+ input: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
+ input: "FusedBatchNormV3_mul_0_param"
+ output: "FusedBatchNormV3_mul_0"
+ mul_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Add"
+ input: "FusedBatchNormV3_mul_0"
+ input: "FusedBatchNormV3"
+ output: "Relu"
+ add_options {
+ activation: RELU
+ }
+}
+input: "Hole"
+output: "Relu"
--- /dev/null
+# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "TCONV_EXIST" $(op_count TRANSPOSE_CONV) '=' 1
+RULE "RELU_EXIST" $(op_count RELU) '=' 1
+RULE "NO_MUL" $(op_count MUL) '=' 0
+RULE "NO_ADD" $(op_count ADD) '=' 0
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 6 dim: 5 dim: 1 }
+}
+operand {
+ name: "perm"
+ type: INT32
+ shape { dim: 4 }
+ filler { tag: "explicit" arg: "0" arg: "3" arg: "2" arg: "1"}
+}
+operand {
+ name: "transpose"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 5 dim: 6 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 5 dim: 6 }
+}
+operation {
+ type: "Transpose"
+ input: "ifm"
+ input: "perm"
+ output: "transpose"
+}
+operation {
+ type: "Abs"
+ input: "transpose"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 6 dim: 5 dim: 1 }
+}
+operand {
+ name: "perm"
+ type: INT32
+ shape { dim: 4 }
+ filler { tag: "explicit" arg: "0" arg: "3" arg: "2" arg: "1"}
+}
+operand {
+ name: "transpose"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 5 dim: 6 }
+}
+operand {
+ name: "add_const"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 6 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 5 dim: 6 }
+}
+operation {
+ type: "Transpose"
+ input: "ifm"
+ input: "perm"
+ output: "transpose"
+}
+operation {
+ type: "Add"
+ input: "transpose"
+ input: "add_const"
+ output: "ofm"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+ name: "filter"
+ type: FLOAT32
+ shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 2
+ stride_h: 2
+ }
+ input: "ifm"
+ input: "filter"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+# To check model can be quantized without QuantizeDequantizeWeights.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "INPUT_UINT8" $(tensor_dtype ifm) '=' UINT8
+RULE "CONV_UINT8" $(tensor_dtype ofm) '=' UINT8
+RULE "WEIGHTS_UINT8" $(tensor_dtype filter) '=' UINT8
+RULE "BIAS_INT32" $(tensor_dtype bias) '=' INT32
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+ name: "filter"
+ type: FLOAT32
+ shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 2
+ stride_h: 2
+ }
+ input: "ifm"
+ input: "filter"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+# To check model can be quantized without QuantizeDequantizeWeights.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "INPUT_INT16" $(tensor_dtype ifm) '=' INT16
+RULE "CONV_INT16" $(tensor_dtype ofm) '=' INT16
+RULE "WEIGHTS_INT16" $(tensor_dtype filter) '=' INT16
+RULE "BIAS_INT64" $(tensor_dtype bias) '=' INT64
--- /dev/null
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 1 dim: 2 dim: 2 dim: 4 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 1 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+ type: "DepthToSpace"
+ depth_to_space_options {
+ block_size: 2
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+# To check fake quantization of DepthToSpace (D2S).
+# 1. ifm is float32.
+# 2. D2S is float32.
+# 3. Q/DQ is inserted at the beginning of the model (from ifm).
+# 4. Q/DQ is not inserted after D2S, because D2S does not change values of input.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_FP32" $(tensor_dtype ifm) '=' FLOAT32
+RULE "D2S_FP32" $(tensor_dtype ofm) '=' FLOAT32
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 1
+RULE "DEQUANTIZE_OP" $(op_count DEQUANTIZE) '=' 1
--- /dev/null
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 2 dim: 2 dim: 12 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+ type: "SpaceToDepth"
+ space_to_depth_options {
+ block_size: 2
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+# To check fake quantization of SpaceToDepth (S2D).
+# 1. ifm is float32.
+# 2. S2D is float32.
+# 3. Q/DQ is inserted at the beginning of the model (from ifm).
+# 4. Q/DQ is not inserted after S2D, because S2D does not change values of input.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_FP32" $(tensor_dtype ifm) '=' FLOAT32
+RULE "S2D_FP32" $(tensor_dtype ofm) '=' FLOAT32
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 1
+RULE "DEQUANTIZE_OP" $(op_count DEQUANTIZE) '=' 1
--- /dev/null
+#
+# NOTE generated by tflchef-reverse with res/TensorFlowPythonExamples/examples/LSTM_retseq
+#
+operand {
+ name: "serving_default_input_1:0"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 4
+ dim: 4
+ }
+}
+operand {
+ name: "sequential/lstm/zeros"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 2
+ }
+ is_variable: true
+}
+operand {
+ name: "arith.constant"
+ type: FLOAT32
+ shape {
+ dim: 2
+ dim: 2
+ }
+ filler {
+ tag: "explicit"
+ arg: "-0.630624"
+ arg: "0.0173528"
+ arg: "0.386502"
+ arg: "0.274398"
+ }
+}
+operand {
+ name: "arith.constant1"
+ type: FLOAT32
+ shape {
+ dim: 2
+ dim: 2
+ }
+ filler {
+ tag: "explicit"
+ arg: "-0.215122"
+ arg: "0.0211586"
+ arg: "0.374135"
+ arg: "0.123864"
+ }
+}
+operand {
+ name: "arith.constant2"
+ type: FLOAT32
+ shape {
+ dim: 2
+ dim: 2
+ }
+ filler {
+ tag: "explicit"
+ arg: "0.395891"
+ arg: "-0.516027"
+ arg: "0.311454"
+ arg: "0.423152"
+ }
+}
+operand {
+ name: "arith.constant3"
+ type: FLOAT32
+ shape {
+ dim: 2
+ dim: 2
+ }
+ filler {
+ tag: "explicit"
+ arg: "-0.107339"
+ arg: "0.408966"
+ arg: "0.0376898"
+ arg: "-0.544077"
+ }
+}
+operand {
+ name: "arith.constant4"
+ type: FLOAT32
+ shape {
+ dim: 2
+ }
+ filler {
+ tag: "explicit"
+ arg: "0"
+ arg: "0"
+ }
+}
+operand {
+ name: "arith.constant5"
+ type: FLOAT32
+ shape {
+ dim: 2
+ }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ arg: "1"
+ }
+}
+operand {
+ name: "arith.constant6"
+ type: FLOAT32
+ shape {
+ dim: 2
+ dim: 4
+ }
+ filler {
+ tag: "explicit"
+ arg: "0.229282"
+ arg: "-0.0678827"
+ arg: "0.449137"
+ arg: "0.470665"
+ arg: "-0.563606"
+ arg: "-0.290711"
+ arg: "0.343602"
+ arg: "-0.427935"
+ }
+}
+operand {
+ name: "arith.constant7"
+ type: FLOAT32
+ shape {
+ dim: 2
+ dim: 4
+ }
+ filler {
+ tag: "explicit"
+ arg: "-0.443107"
+ arg: "-0.504989"
+ arg: "-0.0738791"
+ arg: "-0.538787"
+ arg: "0.440037"
+ arg: "0.268466"
+ arg: "0.0149825"
+ arg: "-0.42883"
+ }
+}
+operand {
+ name: "arith.constant8"
+ type: FLOAT32
+ shape {
+ dim: 2
+ dim: 4
+ }
+ filler {
+ tag: "explicit"
+ arg: "0.523419"
+ arg: "-0.131416"
+ arg: "-0.328037"
+ arg: "-0.636753"
+ arg: "-0.0726868"
+ arg: "-0.347395"
+ arg: "0.390772"
+ arg: "0.467617"
+ }
+}
+operand {
+ name: "arith.constant9"
+ type: FLOAT32
+ shape {
+ dim: 2
+ dim: 4
+ }
+ filler {
+ tag: "explicit"
+ arg: "-0.184187"
+ arg: "-0.636662"
+ arg: "0.363794"
+ arg: "0.428437"
+ arg: "-0.431681"
+ arg: "-0.617431"
+ arg: "0.53586"
+ arg: "0.686365"
+ }
+}
+operand {
+ name: "sequential/lstm/zeros1"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 2
+ }
+ is_variable: true
+}
+operand {
+ name: "StatefulPartitionedCall:0"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 4
+ dim: 2
+ }
+}
+operation {
+ type: "UnidirectionalSequenceLSTM"
+ input: "serving_default_input_1:0"
+ input: "arith.constant9"
+ input: "arith.constant8"
+ input: "arith.constant7"
+ input: "arith.constant6"
+ input: "arith.constant3"
+ input: "arith.constant2"
+ input: "arith.constant1"
+ input: "arith.constant"
+ input: ""
+ input: ""
+ input: ""
+ input: "arith.constant4"
+ input: "arith.constant5"
+ input: "arith.constant4"
+ input: "arith.constant4"
+ input: ""
+ input: ""
+ input: "sequential/lstm/zeros"
+ input: "sequential/lstm/zeros1"
+ input: ""
+ input: ""
+ input: ""
+ input: ""
+ output: "StatefulPartitionedCall:0"
+ unidirectional_sequence_lstm_options {
+ activation: TANH
+ cell_clip: 10
+ proj_clip: 0
+ time_major: false
+ asymmetric_quantize_inputs: false
+ }
+}
+input: "serving_default_input_1:0"
+output: "StatefulPartitionedCall:0"
--- /dev/null
+operand {
+ name: "serving_default_input_16:0"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ dim: 1
+ }
+ is_variable: false
+}
+operand {
+ name: "sequential_15/lstm_15/zeros"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ }
+ is_variable: true
+}
+operand {
+ name: "arith.constant"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ }
+ filler {
+ tag: "explicit"
+ arg: "-0.960517"
+ }
+ is_variable: false
+}
+operand {
+ name: "arith.constant1"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ }
+ filler {
+ tag: "explicit"
+ arg: "-0.182756"
+ }
+}
+operand {
+ name: "arith.constant2"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ }
+ filler {
+ tag: "explicit"
+ arg: "0.028718"
+ }
+}
+operand {
+ name: "arith.constant3"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ }
+ filler {
+ tag: "explicit"
+ arg: "0.207806"
+ }
+}
+operand {
+ name: "arith.constant4"
+ type: FLOAT32
+ shape {
+ dim: 1
+ }
+ filler {
+ tag: "explicit"
+ arg: "0"
+ }
+}
+operand {
+ name: "arith.constant5"
+ type: FLOAT32
+ shape {
+ dim: 1
+ }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ }
+}
+operand {
+ name: "arith.constant6"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ }
+ filler {
+ tag: "explicit"
+ arg: "-0.476033"
+ }
+}
+operand {
+ name: "arith.constant7"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ }
+ filler {
+ tag: "explicit"
+ arg: "0.152916"
+ }
+}
+operand {
+ name: "arith.constant8"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ }
+ filler {
+ tag: "explicit"
+ arg: "0.308059"
+ }
+}
+operand {
+ name: "arith.constant9"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ }
+ filler {
+ tag: "explicit"
+ arg: "0.329067"
+ }
+}
+operand {
+ name: "sequential_15/lstm_15/zeros1"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ }
+ is_variable: true
+}
+operand {
+ name: "StatefulPartitionedCall:0"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ dim: 1
+ }
+}
+operation {
+ type: "UnidirectionalSequenceLSTM"
+ input: "serving_default_input_16:0"
+ input: "arith.constant9"
+ input: "arith.constant8"
+ input: "arith.constant7"
+ input: "arith.constant6"
+ input: "arith.constant3"
+ input: "arith.constant2"
+ input: "arith.constant1"
+ input: "arith.constant"
+ input: ""
+ input: ""
+ input: ""
+ input: "arith.constant4"
+ input: "arith.constant5"
+ input: "arith.constant4"
+ input: "arith.constant4"
+ input: ""
+ input: ""
+ input: "sequential_15/lstm_15/zeros"
+ input: "sequential_15/lstm_15/zeros1"
+ input: ""
+ input: ""
+ input: ""
+ input: ""
+ output: "StatefulPartitionedCall:0"
+ unidirectional_sequence_lstm_options {
+ activation: TANH
+ cell_clip: 10
+ proj_clip: 0
+ time_major: false
+ asymmetric_quantize_inputs: false
+ }
+}
+input: "serving_default_input_16:0"
+output: "StatefulPartitionedCall:0"
--- /dev/null
+# To check if Unroll of UnidriectionalSequenceLSTM works
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "NO_UNIDIRSEQLSTM" $(op_count UNIDIRECTIONAL_SEQUENCE_LSTM) '=' 0
+RULE "YES_LOGISTICS" $(op_count LOGISTICS) '=' 3
+RULE "YES_MUL" $(op_count MUL) '=' 3
--- /dev/null
+operand {
+ name: "serving_default_input_48:0"
+ type: FLOAT32
+ shape {
+ dim: 2
+ dim: 3
+ dim: 4
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "sequential_47/lstm_46/zeros"
+ type: FLOAT32
+ shape {
+ dim: 2
+ dim: 5
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: true
+}
+operand {
+ name: "arith.constant"
+ type: FLOAT32
+ shape {
+ dim: 5
+ dim: 5
+ }
+ filler {
+ tag: "explicit"
+ arg: "-0.182069"
+ arg: "0.158518"
+ arg: "-0.249876"
+ arg: "-0.223681"
+ arg: "-0.0251322"
+ arg: "-0.234799"
+ arg: "0.0315703"
+ arg: "0.0713779"
+ arg: "-0.398819"
+ arg: "-0.331811"
+ arg: "-0.24586"
+ arg: "-0.034448"
+ arg: "-0.187116"
+ arg: "-0.224618"
+ arg: "0.280953"
+ arg: "-0.0503904"
+ arg: "0.0335912"
+ arg: "0.34419"
+ arg: "0.0784627"
+ arg: "0.246556"
+ arg: "-0.446514"
+ arg: "0.175145"
+ arg: "0.494241"
+ arg: "0.120458"
+ arg: "0.106793"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "arith.constant1"
+ type: FLOAT32
+ shape {
+ dim: 5
+ dim: 5
+ }
+ filler {
+ tag: "explicit"
+ arg: "0.409107"
+ arg: "-0.406787"
+ arg: "0.111563"
+ arg: "-0.194133"
+ arg: "-0.229023"
+ arg: "0.287904"
+ arg: "-0.344601"
+ arg: "0.0946776"
+ arg: "-0.198879"
+ arg: "0.532953"
+ arg: "0.105883"
+ arg: "0.113309"
+ arg: "-0.100015"
+ arg: "0.262142"
+ arg: "-0.223262"
+ arg: "-0.00894637"
+ arg: "-0.0819539"
+ arg: "0.195495"
+ arg: "-0.291116"
+ arg: "-0.0707405"
+ arg: "0.274591"
+ arg: "0.313034"
+ arg: "0.396099"
+ arg: "-0.186455"
+ arg: "0.0721643"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "arith.constant2"
+ type: FLOAT32
+ shape {
+ dim: 5
+ dim: 5
+ }
+ filler {
+ tag: "explicit"
+ arg: "0.0879868"
+ arg: "0.20888"
+ arg: "0.0121427"
+ arg: "-0.537515"
+ arg: "-0.20519"
+ arg: "-0.0189587"
+ arg: "0.269877"
+ arg: "-0.182624"
+ arg: "-0.0591339"
+ arg: "0.0318922"
+ arg: "-0.227111"
+ arg: "-0.149458"
+ arg: "-0.172937"
+ arg: "0.0187907"
+ arg: "0.0670664"
+ arg: "-0.121135"
+ arg: "-0.058337"
+ arg: "-0.0598793"
+ arg: "-0.362267"
+ arg: "0.0774832"
+ arg: "0.199173"
+ arg: "-0.0380472"
+ arg: "0.107854"
+ arg: "0.0658764"
+ arg: "0.0537086"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "arith.constant3"
+ type: FLOAT32
+ shape {
+ dim: 5
+ dim: 5
+ }
+ filler {
+ tag: "explicit"
+ arg: "-0.017441"
+ arg: "0.272052"
+ arg: "-0.00516871"
+ arg: "-0.0291451"
+ arg: "0.0884765"
+ arg: "0.0531231"
+ arg: "0.0352237"
+ arg: "-0.00947183"
+ arg: "0.00681541"
+ arg: "-0.000782808"
+ arg: "0.201295"
+ arg: "0.26533"
+ arg: "-0.436603"
+ arg: "-0.0725246"
+ arg: "0.390646"
+ arg: "-0.393321"
+ arg: "-0.447548"
+ arg: "-0.021616"
+ arg: "-0.0852413"
+ arg: "0.143229"
+ arg: "0.0062271"
+ arg: "0.222503"
+ arg: "0.195852"
+ arg: "-0.112013"
+ arg: "0.322707"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "arith.constant4"
+ type: FLOAT32
+ shape {
+ dim: 5
+ }
+ filler {
+ tag: "explicit"
+ arg: "0"
+ arg: "0"
+ arg: "0"
+ arg: "0"
+ arg: "0"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "arith.constant5"
+ type: FLOAT32
+ shape {
+ dim: 5
+ }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ arg: "1"
+ arg: "1"
+ arg: "1"
+ arg: "1"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "arith.constant6"
+ type: FLOAT32
+ shape {
+ dim: 5
+ dim: 4
+ }
+ filler {
+ tag: "explicit"
+ arg: "0.110842"
+ arg: "0.362487"
+ arg: "-0.193648"
+ arg: "0.0795254"
+ arg: "-0.154508"
+ arg: "0.0420029"
+ arg: "-0.320009"
+ arg: "-0.299519"
+ arg: "0.0381875"
+ arg: "-0.439949"
+ arg: "-0.290634"
+ arg: "0.0254151"
+ arg: "-0.138734"
+ arg: "0.328987"
+ arg: "0.449845"
+ arg: "0.0656276"
+ arg: "0.0410624"
+ arg: "-0.35757"
+ arg: "0.234629"
+ arg: "-0.310387"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "arith.constant7"
+ type: FLOAT32
+ shape {
+ dim: 5
+ dim: 4
+ }
+ filler {
+ tag: "explicit"
+ arg: "0.418478"
+ arg: "-0.197303"
+ arg: "-0.0769891"
+ arg: "-0.352671"
+ arg: "-0.27675"
+ arg: "-0.221081"
+ arg: "-0.238606"
+ arg: "-0.0518556"
+ arg: "-0.470707"
+ arg: "0.162187"
+ arg: "-0.0575043"
+ arg: "-0.194339"
+ arg: "0.0110147"
+ arg: "-0.0778302"
+ arg: "0.0032438"
+ arg: "0.305049"
+ arg: "0.353269"
+ arg: "-0.257547"
+ arg: "-0.472484"
+ arg: "-0.0296589"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "arith.constant8"
+ type: FLOAT32
+ shape {
+ dim: 5
+ dim: 4
+ }
+ filler {
+ tag: "explicit"
+ arg: "0.233865"
+ arg: "0.260391"
+ arg: "0.343597"
+ arg: "0.403272"
+ arg: "-0.0299743"
+ arg: "-0.137641"
+ arg: "0.13583"
+ arg: "0.212403"
+ arg: "0.0147645"
+ arg: "-0.382367"
+ arg: "-0.368439"
+ arg: "0.260765"
+ arg: "-0.0455869"
+ arg: "0.329342"
+ arg: "-0.216915"
+ arg: "-0.441979"
+ arg: "0.147086"
+ arg: "0.131922"
+ arg: "-0.44475"
+ arg: "0.0715657"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "arith.constant9"
+ type: FLOAT32
+ shape {
+ dim: 5
+ dim: 4
+ }
+ filler {
+ tag: "explicit"
+ arg: "-0.0868829"
+ arg: "0.127576"
+ arg: "-0.48598"
+ arg: "0.32627"
+ arg: "0.360762"
+ arg: "-0.235853"
+ arg: "-0.223454"
+ arg: "0.265532"
+ arg: "-0.163921"
+ arg: "0.130234"
+ arg: "0.411861"
+ arg: "-0.0193611"
+ arg: "0.165723"
+ arg: "0.326238"
+ arg: "0.119351"
+ arg: "-0.0257632"
+ arg: "0.455063"
+ arg: "-0.0131663"
+ arg: "-0.157016"
+ arg: "0.482517"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operand {
+ name: "sequential_47/lstm_46/zeros1"
+ type: FLOAT32
+ shape {
+ dim: 2
+ dim: 5
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: true
+}
+operand {
+ name: "StatefulPartitionedCall:0"
+ type: FLOAT32
+ shape {
+ dim: 2
+ dim: 3
+ dim: 5
+ }
+ quant {
+ quantized_dimension: 0
+ }
+ is_variable: false
+}
+operation {
+ type: "UnidirectionalSequenceLSTM"
+ input: "serving_default_input_48:0"
+ input: "arith.constant9"
+ input: "arith.constant8"
+ input: "arith.constant7"
+ input: "arith.constant6"
+ input: "arith.constant3"
+ input: "arith.constant2"
+ input: "arith.constant1"
+ input: "arith.constant"
+ input: ""
+ input: ""
+ input: ""
+ input: "arith.constant4"
+ input: "arith.constant5"
+ input: "arith.constant4"
+ input: "arith.constant4"
+ input: ""
+ input: ""
+ input: "sequential_47/lstm_46/zeros"
+ input: "sequential_47/lstm_46/zeros1"
+ input: ""
+ input: ""
+ input: ""
+ input: ""
+ output: "StatefulPartitionedCall:0"
+ unidirectional_sequence_lstm_options {
+ activation: TANH
+ cell_clip: 10
+ proj_clip: 0
+ time_major: false
+ asymmetric_quantize_inputs: false
+ }
+}
+input: "serving_default_input_48:0"
+output: "StatefulPartitionedCall:0"
--- /dev/null
+# To check if Unroll of UnidriectionalSequenceLSTM works
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "NO_UNIDIRSEQLSTM" $(op_count UNIDIRECTIONAL_SEQUENCE_LSTM) '=' 0
+RULE "YES_FC" $(op_count FULLY_CONNECTED) '=' 5
## Prerequisite
-- Python 3.X
-- TensorFlow 1.15
+- Python 3.8
+- TensorFlow 2.8.0
+- NOTE some examples may use old versions
## Directory Layout
## HOWTO: Create a Python environment
-TBA
+Install release debian packages in https://github.com/Samsung/ONE/releases
+and enter virtual environment.
+```
+source /usr/share/one/bin/venv/bin/activate
+```
+You may have to prepare for the first time. Read [how-to-prepare-virtualenv.txt]
+(https://github.com/Samsung/ONE/blob/master/compiler/one-cmds/how-to-prepare-virtualenv.txt)
+for more information.
## HOWTO: Generate a pbtxt from examples
--- /dev/null
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+model.add(keras.layers.GRU(2, input_shape=shape, unroll=True))
--- /dev/null
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+model.add(keras.layers.LSTM(2, input_shape=shape))
+
+# NOTE refer https://github.com/Samsung/ONE/issues/9895#issuecomment-1289766546
--- /dev/null
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+model.add(keras.layers.LSTM(2, input_shape=shape, return_sequences=True))
+
+# NOTE refer https://github.com/Samsung/ONE/issues/9895#issuecomment-1289768739
--- /dev/null
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+model.add(keras.layers.LSTM(2, input_shape=shape, unroll=True))
+
+# NOTE refer https://github.com/Samsung/ONE/issues/9895#issuecomment-1288436802
--- /dev/null
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+grucell = keras.layers.GRUCell(2)
+model.add(keras.layers.RNN(grucell, input_shape=shape, unroll=True))
--- /dev/null
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+lstmcell = keras.layers.LSTMCell(2)
+model.add(keras.layers.RNN(lstmcell, input_shape=shape, unroll=True))
+
+# NOTE refer https://github.com/Samsung/ONE/issues/9895#issuecomment-1289820894
--- /dev/null
+# NOTE tested with TF 2.8.0
+from tensorflow import keras
+
+model = keras.Sequential()
+shape = (4, 4)
+
+model.add(keras.layers.InputLayer(input_shape=shape, batch_size=1))
+model.add(keras.layers.SimpleRNN(2, input_shape=shape, unroll=True))
+
+# NOTE refer https://github.com/Samsung/ONE/issues/9895#issuecomment-1289811569
import importlib
import argparse
+from pathlib import Path
+from tensorflow import keras
+
parser = argparse.ArgumentParser(description='Process TensorFlow Python Examples')
-parser.add_argument('--mode', metavar='MODE', choices=['pbtxt'], default='pbtxt')
parser.add_argument('examples', metavar='EXAMPLES', nargs='+')
args = parser.parse_args()
-if args.mode == 'pbtxt':
- for example in args.examples:
- print("Generate '" + example + ".pbtxt'")
+output_folder = "./output/"
+
+Path(output_folder).mkdir(parents=True, exist_ok=True)
+
+for example in args.examples:
+ print("Generate '" + example + ".pbtxt'")
+
+ tf.compat.v1.reset_default_graph()
+ # https://stackoverflow.com/questions/37808866/proper-way-to-dynamically-import-a-module-with-relative-imports
+ m = importlib.import_module("examples." + example)
+
+ with open(output_folder + example + ".pbtxt", "w") as f:
+ f.write(str(tf.compat.v1.get_default_graph().as_graph_def(add_shapes=True)))
+
+ print("Generate '" + example + ".pbtxt' - Done")
- tf.compat.v1.reset_default_graph()
- # https://stackoverflow.com/questions/37808866/proper-way-to-dynamically-import-a-module-with-relative-imports
- importlib.import_module("examples." + example)
+ # keras sequential?
+ if hasattr(m, 'model') and isinstance(m.model, keras.Sequential):
+ print("Generate '" + example + ".h5'")
+ m.model.save(output_folder + example + ".h5")
+ print("Generate '" + example + ".h5' - Done")
- with open(example + ".pbtxt", "w") as f:
- f.write(str(tf.compat.v1.get_default_graph().as_graph_def(add_shapes=True)))
+ # tflite export for experiments
+ converter = tf.lite.TFLiteConverter.from_keras_model(m.model)
+ converter.allow_custom_ops = True
+ converter.experimental_new_converter = True
+ converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
+ converter._experimental_lower_tensor_list_ops = False
- print("Generate '" + example + ".pbtxt' - Done")
+ tflite_model = converter.convert()
+ with open(output_folder + example + ".tflite", "wb") as f:
+ f.write(tflite_model)
+ print("Generate '" + example + ".tflite' - Done")
minSdkVersion 26
targetSdkVersion 29
versionCode 1
- versionName "1.21.0"
+ versionName "1.22.0"
externalNativeBuild {
ndkBuild {
+++ /dev/null
-if(NOT BUILD_ANDROID_TFLITE)
- return()
-endif(NOT BUILD_ANDROID_TFLITE)
-
-nnfw_find_package(TensorFlowLite EXACT 1.13.1 REQUIRED)
-
-if(NOT DEFINED NDK_DIR)
- message(FATAL_ERROR "NDK_DIR should be specified via environment variable")
-endif()
-message(STATUS "Found NDK: ${NDK_DIR}")
-
-#
-# Tensorflow Lite JNI library
-#
-set(TENSORFLOW_LITE_BASE "${TensorFlowSource_DIR}/tensorflow/lite")
-set(TFLITE_JNI_BASE ${TENSORFLOW_LITE_BASE}/java/src/main/native)
-set(TFLITE_JNI_SRCS ${TFLITE_JNI_BASE}/exception_jni.cc
- ${TFLITE_JNI_BASE}/nativeinterpreterwrapper_jni.cc
- ${TFLITE_JNI_BASE}/tensor_jni.cc
- ${TFLITE_JNI_BASE}/tensorflow_lite_jni.cc
- ${CMAKE_CURRENT_SOURCE_DIR}/builtin_ops_jni.cc # Use nnfw's OpResolver
- )
-set(TFLITE_JNI_INCLUDES ${TENSORFLOW_LITE_BASE}/java/src/native)
-
-# TODO use tensorflow-lite static library instead of compiling all the sources again
-add_library(tensorflowlite_jni SHARED ${TFLITE_JNI_SRCS} ${TFLITE_SRCS})
-target_include_directories(tensorflowlite_jni PUBLIC ${TFLITE_JNI_INCLUDES} ${TFLITE_INCLUDES})
-target_link_libraries(tensorflowlite_jni eigen ${LIB_PTHREAD} dl)
-target_link_libraries(tensorflowlite_jni log)
-target_link_libraries(tensorflowlite_jni nnfw_lib_tflite)
-install(TARGETS tensorflowlite_jni DESTINATION lib)
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the License);
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tensorflow/lite/kernels/register.h"
-#include "tflite/ext/kernels/register.h"
-
-namespace tflite
-{
-
-std::unique_ptr<OpResolver> CreateOpResolver()
-{
- return std::unique_ptr<::nnfw::tflite::BuiltinOpResolver>(
- new ::nnfw::tflite::BuiltinOpResolver());
-}
-
-} // namespace tflite
{
cl_int buildErr = CL_SUCCESS;
auto buildInfo = program_.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr);
- for (auto &pair : buildInfo)
+ for (const auto &pair : buildInfo)
{
std::cerr << pair.second << std::endl << std::endl;
}
namespace benchmark
{
-// Data class between runner(nnpackage_run and tflite_run) and libbenchmark
+// Data class between runner(onert_run and tflite_run) and libbenchmark
class Result
{
public:
}
uint32_t cur_pss = getPssSum();
- for (auto &phase : _phases)
+ for (const auto &phase : _phases)
{
auto &rss = _rss_map.at(phase);
if (rss < cur_rss)
#include <cassert>
#include <chrono>
#include <iostream>
-#include <sys/time.h>
+#include <time.h>
namespace
{
uint64_t nowMicros()
{
- struct timeval tv;
- gettimeofday(&tv, nullptr);
- return static_cast<uint64_t>(tv.tv_sec) * 1e6 + tv.tv_usec;
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return static_cast<uint64_t>(ts.tv_nsec) / 1e3 + static_cast<uint64_t>(ts.tv_sec) * 1e6;
}
void SleepForMicros(uint64_t micros)
target_link_libraries(nnfw_lib_misc_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
add_test(nnfw_lib_misc_test nnfw_lib_misc_test)
-install(TARGETS nnfw_lib_misc_test DESTINATION unittest_standalone)
+install(TARGETS nnfw_lib_misc_test DESTINATION unittest)
target_link_libraries(ndarray_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
add_test(ndarray_test ndarray_test)
-install(TARGETS ndarray_test DESTINATION unittest_standalone)
+install(TARGETS ndarray_test DESTINATION unittest)
add_subdirectory(example)
#if defined(_MSC_VER)
#include <chrono> // NOLINT(build/c++11)
#else
-#include <sys/time.h>
+#include <time.h>
#endif
namespace tflite {
#else
uint64_t NowMicros() {
- struct timeval tv;
- gettimeofday(&tv, nullptr);
- return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return static_cast<uint64_t>(ts.tv_nsec) / 1e3 + static_cast<uint64_t>(ts.tv_sec) * 1e6;
}
#endif // defined(_MSC_VER)
-nnfw_find_package(TensorFlowLite EXACT 1.13.1 QUIET)
+nnfw_find_package(TensorFlowLite EXACT 2.8.0 QUIET)
if(NOT TensorFlowLite_FOUND)
message(STATUS "Check tensorflow lite library extension build: need tensorflow lite library")
return()
add_library(nnfw_lib_tflite STATIC ${SOURCES})
set_target_properties(nnfw_lib_tflite PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(nnfw_lib_tflite PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_link_libraries(nnfw_lib_tflite PUBLIC tensorflow-lite)
+target_link_libraries(nnfw_lib_tflite PUBLIC tensorflow-lite-2.8.0)
target_link_libraries(nnfw_lib_tflite PUBLIC nnfw_lib_misc)
target_link_libraries(nnfw_lib_tflite PRIVATE ${LIB_PTHREAD} dl)
target_link_libraries(nnfw_lib_tflite PRIVATE nnfw_common)
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_TFLITE_COPY_INPUT_INITIALIZER_H__
-#define __NNFW_TFLITE_COPY_INPUT_INITIALIZER_H__
-
-#include <tensorflow/lite/interpreter.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-class CopyInputInitializer
-{
-public:
- CopyInputInitializer(::tflite::Interpreter &from) : _from{from}
- {
- // DO NOTHING
- }
-
- void run(::tflite::Interpreter &interp);
-
-private:
- template <typename T> void setValue(::tflite::Interpreter &interp, int tensor_idx);
-
-private:
- ::tflite::Interpreter &_from;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_COPY_INPUT_INITIALIZER_H__
#ifndef __NNFW_TFLITE_DIFF_H__
#define __NNFW_TFLITE_DIFF_H__
-#include "tensorflow/lite/interpreter.h"
+#include "tflite/TensorView.h"
#include "misc/RandomGenerator.h"
#include "misc/tensor/Index.h"
#include "misc/tensor/Shape.h"
#include "misc/tensor/Comparator.h"
-#include "tflite/TensorView.h"
+#include <tensorflow/lite/c/c_api.h>
#include <functional>
#include <vector>
public:
/**
* @brief Run two interpreter and return the output matching
- * @param[in] pure Interpreter object of expected(with TfLite)
- * @param[in] nnapi Interpreter object of obtained(through NNAPI)
+ * @param[in] expected Interpreter object of expected
+ * @param[in] obtained Interpreter object of obtained
* @return @c true if two Interpreter results are same, otherwise @c false
*/
- bool run(::tflite::Interpreter &pure, ::tflite::Interpreter &nnapi) const;
+ bool run(TfLiteInterpreter &expected, TfLiteInterpreter &obtained) const;
/**
* @brief Compare two TensorView values and return the match result
* @param[in] expected TensorView object to read expected values
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file FeatureView.h
- * @brief This file contains FeatureView class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_FEATURE_VIEW_H__
-#define __NNFW_TFLITE_FEATURE_VIEW_H__
-
-#include "tensorflow/lite/interpreter.h"
-
-#include "tflite/InputIndex.h"
-#include "tflite/OutputIndex.h"
-
-#include "misc/feature/Shape.h"
-#include "misc/feature/Reader.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-template <typename T> class FeatureView;
-
-/**
- * @brief Class to support reading element of float type feature
- */
-template <> class FeatureView<float> : public nnfw::misc::feature::Reader<float>
-{
-public:
- /**
- * @brief Construct a new FeatureView object
- * @param[in] interp Interpreter to read from
- * @param[in] index InputIndex index of input
- */
- FeatureView(::tflite::Interpreter &interp, const InputIndex &index);
- /**
- * @brief Construct a new FeatureView object
- * @param[in] interp Interpreter to read from
- * @param[in] index OutputIndex index of output
- */
- FeatureView(::tflite::Interpreter &interp, const OutputIndex &index);
-
-public:
- /**
- * @brief Get value of element using channel, row and column index
- * @param[in] ch Channel index
- * @param[in] row Row index
- * @param[in] col Column index
- * @return Value of element
- */
- float at(uint32_t ch, uint32_t row, uint32_t col) const;
- /**
- * @brief Get reference of element using channel, row and column index
- * @param[in] ch Channel index
- * @param[in] row Row index
- * @param[in] col Column index
- * @return Reference of element
- */
- float &at(uint32_t ch, uint32_t row, uint32_t col);
-
- float at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const = 0;
-
-private:
- /**
- * @brief Get offset of element from channel, row and column index
- * @param[in] ch Channel index
- * @param[in] row Row index
- * @param[in] col Column index
- * @return Offset of element
- */
- uint32_t getElementOffset(uint32_t ch, uint32_t row, uint32_t col) const
- {
- uint32_t res = 0;
-
- // TensorFlow Lite assumes that NHWC ordering for tessor
- res += row * _shape.W * _shape.C;
- res += col * _shape.C;
- res += ch;
-
- return res;
- }
-
-private:
- nnfw::misc::feature::Shape _shape;
- float *_base;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_FEATURE_VIEW_H__
* @brief Construct a InterpreterSession object with interpreter of TfLite
* @param[in] interp The TfLite interpreter pointer
*/
- InterpreterSession(::tflite::Interpreter *interp) : _interp{interp}
+ InterpreterSession(TfLiteInterpreter *interp) : _interp{interp}
{
// DO NOTHING
}
* @brief Get TfLite interpreter pointer
* @return The TfLite interpreter
*/
- ::tflite::Interpreter *interp(void) override { return _interp; }
+ TfLiteInterpreter *interp(void) override { return _interp; }
public:
/**
*/
bool prepare(void) override
{
- _interp->UseNNAPI(false);
-
- if (kTfLiteOk != _interp->AllocateTensors())
+ if (kTfLiteOk != TfLiteInterpreterAllocateTensors(_interp))
{
return false;
}
bool run(void) override
{
// Return true if Invoke returns kTfLiteOk
- return kTfLiteOk == _interp->Invoke();
+ return kTfLiteOk == TfLiteInterpreterInvoke(_interp);
}
/**
}
private:
- ::tflite::Interpreter *const _interp;
+ TfLiteInterpreter *const _interp;
};
} // namespace tflite
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file NNAPISession.h
- * @brief This file contains NNAPISession class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_NNAPI_SESSION_H__
-#define __NNFW_TFLITE_NNAPI_SESSION_H__
-
-#include "Session.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define NNAPI interpreter session which is inherited from Session class
- */
-class NNAPISession final : public Session
-{
-public:
- /**
- * @brief Construct a NNAPISession object with interpreter of TfLite
- * @param[in] interp The TfLite interpreter pointer
- * @note Invoke BuildGraph() of NNAPI delegate from Interpreter
- */
- NNAPISession(::tflite::Interpreter *interp) : _interp{interp}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Get TfLite interpreter pointer
- * @return The TfLite interpreter
- */
- ::tflite::Interpreter *interp(void) override { return _interp; }
-
-public:
- /**
- * @brief Prepare the TfLite interpreter session
- * @return @c true if tensor preparation is successful, otherwise @c false
- */
- bool prepare(void) override
- {
- // Explicitly turn off T/F lite internal NNAPI delegation in order to use locally defined
- // NNAPI delegation.
- _interp->UseNNAPI(true);
-
- if (kTfLiteOk != _interp->AllocateTensors())
- {
- return false;
- }
-
- return true;
- }
-
- /**
- * @brief Run the Invoke function of NNAPI delegate
- * @return @c true if Invoke() is successful, otherwise @c false
- */
- bool run(void) override { return kTfLiteOk == _interp->Invoke(); }
-
- /**
- * @brief Tear down TfLite interpreter session
- * @return @c true always
- */
- bool teardown(void) override
- {
- // DO NOTHING
- return true;
- }
-
-private:
- ::tflite::Interpreter *const _interp;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_NNAPI_SESSION_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file OutputIndex.h
- * @brief This file contains OutputIndex class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_OUTPUT_INDEX_H__
-#define __NNFW_TFLITE_OUTPUT_INDEX_H__
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define OutputIndex
- */
-class OutputIndex
-{
-public:
- /**
- * @brief Construct a OutputIndex object with index value
- * @param[in] index The value of index
- */
- OutputIndex(int index) : _index(index)
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Get index value as int
- * @return Index valuel as int
- */
- int asInt(void) const { return _index; }
-
-private:
- int _index;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_OUTPUT_INDEX_H__
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_TFLITE_OUTPUT_RESETTER_H__
-#define __NNFW_TFLITE_OUTPUT_RESETTER_H__
-
-#include <tensorflow/lite/interpreter.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-class OutputResetter
-{
-public:
- OutputResetter()
- {
- // DO NOTHING
- }
-
- void run(::tflite::Interpreter &interp);
-
-private:
- template <typename T> void resetValue(::tflite::Interpreter &interp, int tensor_idx);
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_OUTPUT_RESETTER_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Quantization.h
- * @brief This file contains BitwiseIntToFloat union and quantization related
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_QUANTIZATION_H__
-#define __NNFW_TFLITE_QUANTIZATION_H__
-
-/**
- * @brief Union to provide bitwise conversion of integer and float
- */
-union BitwiseIntToFloat {
- int i;
- float f;
-};
-
-static const float FLOAT_NEAREST_TO_1 = BitwiseIntToFloat{0x3f7fffff}.f;
-
-#include "tensorflow/lite/context.h"
-
-/**
- * @brief Get TfLiteQuantizationParams object with default values
- * @return TfLiteQuantizationParams object
- */
-TfLiteQuantizationParams make_default_quantization(void);
-
-#endif // __NNFW_TFLITE_QUANTIZATION_H__
#include <misc/RandomGenerator.h>
-#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/c/c_api.h>
namespace nnfw
{
// DO NOTHING
}
- void run(::tflite::Interpreter &interp);
-
-private:
- template <typename T> void setValue(::tflite::Interpreter &interp, int tensor_idx);
+ void run(TfLiteInterpreter &interp);
private:
nnfw::misc::RandomGenerator &_randgen;
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file RandomTestRunner.h
- * @brief This file contains class for random input testing
- */
-
-#ifndef __NNFW_TFLITE_RANDOM_TEST_RUNNER_H__
-#define __NNFW_TFLITE_RANDOM_TEST_RUNNER_H__
-
-#include "tflite/interp/Builder.h"
-
-#include <misc/RandomGenerator.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Structure for NNAPI correctness test
- */
-struct RandomTestParam
-{
- int verbose; //!< Verbosity of debug information
- int tolerance; //!< Torlerance of value difference
- int tensor_logging = 0; //!< Save logging to a file if not 0
- std::string log_path = ""; //!< Path of log file, meaningful only when tensor_logging is 1
-};
-
-/**
- * @brief Class to define Random test runner
- */
-class RandomTestRunner
-{
-public:
- /**
- * @brief Construct a new RandomTestRunner object
- * @param[in] seed Random seed value
- * @param[in] param RandomTestParam object for test runner
- * @param[in] quantization TfLiteQuantizationParams type to represent quantization value
- */
- RandomTestRunner(uint32_t seed, const RandomTestParam ¶m)
- : _randgen{seed, 0.0f, 2.0f}, _param{param}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Run the random test runner
- * @param[in] running_count Count to run tflite interpreter with NNAPI
- * @return 0 if test succeeds, otherwise failure
- */
- int run(size_t running_count);
-
-public:
- /**
- * @brief Get RandomGenerator reference
- * @return RandomGenerator reference
- */
- nnfw::misc::RandomGenerator &generator() { return _randgen; };
-
-public:
- /**
- * @brief Compile the random test runner
- * @param[in] builder Interpreter Builder used to run
- */
- void compile(const nnfw::tflite::Builder &builder);
-
-private:
- nnfw::misc::RandomGenerator _randgen;
- const RandomTestParam _param;
- std::unique_ptr<::tflite::Interpreter> _tfl_interp;
- std::unique_ptr<::tflite::Interpreter> _nnapi;
-
-public:
- /**
- * @brief Create a RandomTestRunner object
- * @param[in] seed Random seed value
- * @return RandomGenerator object
- */
- static RandomTestRunner make(uint32_t seed);
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_RANDOM_TEST_RUNNER_H__
#ifndef __NNFW_TFLITE_SESSION_H__
#define __NNFW_TFLITE_SESSION_H__
-#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/c/c_api.h>
namespace nnfw
{
* @brief Get the Interpreter object pointer
* @return The Interpreter object pointer
*/
- virtual ::tflite::Interpreter *interp(void) = 0;
+ virtual TfLiteInterpreter *interp(void) = 0;
/**
* @brief Prepare the session
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file TensorLogger.h
- * @brief This file contains TensorLogger class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_TENSOR_LOGGER_H__
-#define __NNFW_TFLITE_TENSOR_LOGGER_H__
-
-#include "misc/tensor/IndexIterator.h"
-#include "tflite/TensorView.h"
-
-#include <tensorflow/lite/interpreter.h>
-#include <tensorflow/lite/context.h>
-#include <fstream>
-#include <iomanip>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to write input and output value / shape into a file in python form
- * @note This is a utility to write input and output value / shape into a file in python form.\n
- * any python app can load this value by running the python code below:\n
- * exec(open(filename).read())\n
- * generated python code looks like the following: \n
- * tensor_shape_gen = []\n
- * tensor_value_gen = []\n\n
- * tensor_shape_gen.append("{2, 1, 2}")\n
- * tensor_value_gen.append([1, 2, 3, 4])\n\n
- * tensor_shape_gen.append("{2}")\n
- * tensor_value_gen.append([1, 2])\n\n
- * tensor_shape_gen.append("{2, 1, 2}")\n
- * tensor_value_gen.append([1, 4, 3, 8])\n
- */
-class TensorLogger
-{
-private:
- std::ofstream _outfile;
-
-public:
- /**
- * @brief Get TensorLogger instance
- * @return The TensorLogger instance
- */
- static TensorLogger &get()
- {
- static TensorLogger instance;
- return instance;
- }
-
- /**
- * @brief Save the tensor details to file from interpreter
- * @param[in] path The file path to save
- * @param[in] interp The TfLite interpreter
- */
- void save(const std::string &path, ::tflite::Interpreter &interp)
- {
- open(path);
-
- int log_index = 0;
- for (const auto id : interp.inputs())
- {
- _outfile << "# input tensors" << std::endl;
- printTensor(interp, id, log_index++);
- }
- for (const auto id : interp.outputs())
- {
- _outfile << "# output tensors" << std::endl;
- printTensor(interp, id, log_index++);
- }
- close();
- }
-
-private:
- void open(const std::string &path)
- {
- if (!_outfile.is_open())
- _outfile.open(path, std::ios_base::out);
-
- _outfile << "# ------ file: " << path << " ------" << std::endl
- << "tensor_shape_gen = []" << std::endl
- << "tensor_value_gen = []" << std::endl
- << std::endl;
- }
-
- void printTensor(::tflite::Interpreter &interp, const int id, const int log_index)
- {
- const TfLiteTensor *tensor = interp.tensor(id);
-
- _outfile << "# tensor name: " << tensor->name << std::endl;
- _outfile << "# tflite::interpreter.tensor(" << id << ") -> tensor_value_gen[" << log_index
- << "]" << std::endl;
-
- if (tensor->type == kTfLiteInt32)
- {
- printTensorShape(tensor);
- printTensorValue<int32_t>(tensor, tensor->data.i32);
- }
- else if (interp.tensor(id)->type == kTfLiteUInt8)
- {
- printTensorShape(tensor);
- printTensorValue<uint8_t>(tensor, tensor->data.uint8);
- }
- else if (tensor->type == kTfLiteFloat32)
- {
- printTensorShape(tensor);
- printTensorValue<float>(tensor, tensor->data.f);
- }
- }
-
- void printTensorShape(const TfLiteTensor *tensor)
- {
- _outfile << "tensor_shape_gen.append('{";
-
- int r = 0;
- for (; r < tensor->dims->size - 1; r++)
- {
- _outfile << tensor->dims->data[r] << ", ";
- }
- _outfile << tensor->dims->data[r];
-
- _outfile << "}')" << std::endl;
- }
-
- template <typename T> void printTensorValue(const TfLiteTensor *tensor, T *tensor_data_ptr)
- {
- _outfile << "tensor_value_gen.append([";
-
- _outfile << std::fixed << std::setprecision(10);
-
- const T *end = reinterpret_cast<const T *>(tensor->data.raw_const + tensor->bytes);
- for (T *ptr = tensor_data_ptr; ptr < end; ptr++)
- _outfile << *ptr << ", ";
-
- _outfile << "])" << std::endl << std::endl;
- }
-
- void close()
- {
- _outfile << "# --------- tensor shape and value defined above ---------" << std::endl;
- _outfile.close();
- }
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_TENSOR_LOGGER_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file TensorUtils.h
- * @brief This file contains utilities function
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_TENSOR_UTILS_H__
-#define __NNFW_TFLITE_TENSOR_UTILS_H__
-
-#include <tensorflow/lite/context.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Get @c true if tensor type is kTfLiteFloat32, otherwise @c false
- * @param[in] tensor The tensor object to be compared
- * @return @c true if tensor type is kTfLiteFloat32, otherwise @c false
- */
-inline bool isFloatTensor(const TfLiteTensor *tensor) { return tensor->type == kTfLiteFloat32; }
-
-/**
- * @brief Get @c true if tensor is 4-D tensor and the first dimension length is 1,
- * otherwise @c false
- * @param[in] tensor The tensor object to be compared
- * @return @c true if tensor is 4-D tensor and the first dimension length is 1, otherwise @c false
- */
-inline bool isFeatureTensor(const TfLiteTensor *tensor)
-{
- return (tensor->dims->size == 4) && (tensor->dims->data[0] == 1);
-}
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_TENSOR_UTILS_H__
#ifndef __NNFW_TFLITE_TENSOR_VIEW_H__
#define __NNFW_TFLITE_TENSOR_VIEW_H__
-#include "tensorflow/lite/interpreter.h"
-
#include "misc/tensor/Shape.h"
#include "misc/tensor/Index.h"
#include "misc/tensor/Reader.h"
#include "misc/tensor/NonIncreasingStride.h"
+#include <tensorflow/lite/c/c_api.h>
+
namespace nnfw
{
namespace tflite
* @param[in] tensor_index The tensor index
* @return The new TensorView<T> object
*/
- static TensorView<T> make(::tflite::Interpreter &interp, int tensor_index)
+ static TensorView<T> make(const TfLiteTensor *tensor)
{
- auto tensor_ptr = interp.tensor(tensor_index);
-
// Set 'shape'
- nnfw::misc::tensor::Shape shape(tensor_ptr->dims->size);
+ nnfw::misc::tensor::Shape shape(TfLiteTensorNumDims(tensor));
for (uint32_t axis = 0; axis < shape.rank(); ++axis)
{
- shape.dim(axis) = tensor_ptr->dims->data[axis];
+ shape.dim(axis) = TfLiteTensorDim(tensor, axis);
}
- return TensorView<T>(shape, interp.typed_tensor<T>(tensor_index));
+ return TensorView<T>(shape, reinterpret_cast<T *>(TfLiteTensorData(tensor)));
}
};
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Builder.h
- * @brief This file contains Builder structure
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_INTERP_BUILDER_H__
-#define __NNFW_TFLITE_INTERP_BUILDER_H__
-
-#include <tensorflow/lite/interpreter.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Structure to Builder
- */
-struct Builder
-{
- /**
- * @brief Destroy the Builder object
- */
- virtual ~Builder() = default;
-
- /**
- * @brief Build a FlatBuffer model
- * @return The TfLite interpreter object
- */
- virtual std::unique_ptr<::tflite::Interpreter> build(void) const = 0;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_INTERP_BUILDER_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file FlatBufferBuilder.h
- * @brief This file contains FlatBufferBuilder class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__
-#define __NNFW_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__
-
-#include <tensorflow/lite/model.h>
-
-#include "tflite/interp/Builder.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define FlatBufferBuilder which is inherited from Builder
- */
-class FlatBufferBuilder final : public Builder
-{
-public:
- /**
- * @brief Construct a FlatBufferBuilder object with FlatBufferModel of TfLite
- * @param[in] model The TfLite Flatbuffer model
- */
- FlatBufferBuilder(const ::tflite::FlatBufferModel &model) : _model{model}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Build a FlatBuffer model
- * @return The TfLite interpreter pointer address
- */
- std::unique_ptr<::tflite::Interpreter> build(void) const override;
-
-private:
- const ::tflite::FlatBufferModel &_model;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file FunctionBuilder.h
- * @brief This file contains FunctionBuilder class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_INTERP_FUNCTION_BUILDER_H__
-#define __NNFW_TFLITE_INTERP_FUNCTION_BUILDER_H__
-
-#include <tensorflow/lite/model.h>
-
-#include "tflite/interp/Builder.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define FunctionBuilder which is inherited from Builder
- */
-class FunctionBuilder final : public Builder
-{
-public:
- using SetupFunc = std::function<void(::tflite::Interpreter &)>;
-
-public:
- /**
- * @brief Construct a FunctionBuilder object with SetupFunction
- * @param[in] fn The SetupFunc object
- */
- FunctionBuilder(const SetupFunc &fn) : _fn{fn}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Build a SetupFunc
- * @return The TfLite interpreter pointer address
- */
- std::unique_ptr<::tflite::Interpreter> build(void) const override;
-
-private:
- SetupFunc _fn;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_INTERP_FUNCTION_BUILDER_H__
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/CopyInputInitializer.h"
-#include "tflite/TensorView.h"
-
-#include <misc/tensor/IndexIterator.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-void CopyInputInitializer::run(::tflite::Interpreter &interp)
-{
- for (const auto &tensor_idx : interp.inputs())
- {
- TfLiteTensor *tensor = interp.tensor(tensor_idx);
- switch (tensor->type)
- {
- case kTfLiteInt32:
- setValue<int32_t>(interp, tensor_idx);
- break;
- case kTfLiteUInt8:
- setValue<uint8_t>(interp, tensor_idx);
- break;
- case kTfLiteInt8:
- setValue<int8_t>(interp, tensor_idx);
- break;
- case kTfLiteBool:
- setValue<bool>(interp, tensor_idx);
- break;
- case kTfLiteFloat32:
- setValue<float>(interp, tensor_idx);
- break;
- default:
- throw std::runtime_error{"Not supported input type"};
- }
- }
-}
-
-template <typename T>
-void CopyInputInitializer::setValue(::tflite::Interpreter &interp, int tensor_idx)
-{
- auto tensor_from_view = nnfw::tflite::TensorView<T>::make(_from, tensor_idx);
- auto tensor_to_view = nnfw::tflite::TensorView<T>::make(interp, tensor_idx);
-
- nnfw::misc::tensor::iterate(tensor_from_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- tensor_to_view.at(ind) = tensor_from_view.at(ind);
- };
-}
-
-} // namespace tflite
-} // namespace nnfw
#include "misc/tensor/Zipper.h"
#include "misc/tensor/Comparator.h"
+#include <tensorflow/lite/c/c_api.h>
+
#include <iostream>
#include <cassert>
#include <map>
-bool TfLiteInterpMatchApp::run(::tflite::Interpreter &interp, ::tflite::Interpreter &nnapi) const
+bool TfLiteInterpMatchApp::run(TfLiteInterpreter &expected, TfLiteInterpreter &obtained) const
{
- assert(interp.outputs() == nnapi.outputs());
+ auto output_count = TfLiteInterpreterGetOutputTensorCount(&expected);
+ assert(output_count == TfLiteInterpreterGetOutputTensorCount(&obtained));
bool all_matched = true;
- using Comparator = std::function<bool(int id, ::tflite::Interpreter &, ::tflite::Interpreter &)>;
+ using Comparator = std::function<bool(int32_t, const TfLiteTensor *, const TfLiteTensor *)>;
std::map<TfLiteType, Comparator> comparators;
- comparators[kTfLiteUInt8] = [this](int id, ::tflite::Interpreter &interp,
- ::tflite::Interpreter &nnapi) {
- const auto expected = nnfw::tflite::TensorView<uint8_t>::make(interp, id);
- const auto obtained = nnfw::tflite::TensorView<uint8_t>::make(nnapi, id);
+ comparators[kTfLiteUInt8] = [this](int32_t id, const TfLiteTensor *expected_tensor,
+ const TfLiteTensor *obtained_tensor) {
+ const auto expected_view = nnfw::tflite::TensorView<uint8_t>::make(expected_tensor);
+ const auto obtained_view = nnfw::tflite::TensorView<uint8_t>::make(obtained_tensor);
- return compareSingleTensorView(expected, obtained, id);
+ return compareSingleTensorView(expected_view, obtained_view, id);
};
- comparators[kTfLiteInt32] = [this](int id, ::tflite::Interpreter &interp,
- ::tflite::Interpreter &nnapi) {
- const auto expected = nnfw::tflite::TensorView<int32_t>::make(interp, id);
- const auto obtained = nnfw::tflite::TensorView<int32_t>::make(nnapi, id);
+ comparators[kTfLiteInt32] = [this](int32_t id, const TfLiteTensor *expected_tensor,
+ const TfLiteTensor *obtained_tensor) {
+ const auto expected_view = nnfw::tflite::TensorView<int32_t>::make(expected_tensor);
+ const auto obtained_view = nnfw::tflite::TensorView<int32_t>::make(obtained_tensor);
- return compareSingleTensorView(expected, obtained, id);
+ return compareSingleTensorView(expected_view, obtained_view, id);
};
- comparators[kTfLiteFloat32] = [this](int id, ::tflite::Interpreter &interp,
- ::tflite::Interpreter &nnapi) {
- const auto expected = nnfw::tflite::TensorView<float>::make(interp, id);
- const auto obtained = nnfw::tflite::TensorView<float>::make(nnapi, id);
+ comparators[kTfLiteFloat32] = [this](int32_t id, const TfLiteTensor *expected_tensor,
+ const TfLiteTensor *obtained_tensor) {
+ const auto expected_view = nnfw::tflite::TensorView<float>::make(expected_tensor);
+ const auto obtained_view = nnfw::tflite::TensorView<float>::make(obtained_tensor);
- return compareSingleTensorView(expected, obtained, id);
+ return compareSingleTensorView(expected_view, obtained_view, id);
};
- comparators[kTfLiteBool] = [this](int id, ::tflite::Interpreter &interp,
- ::tflite::Interpreter &nnapi) {
- const auto expected = nnfw::tflite::TensorView<bool>::make(interp, id);
- const auto obtained = nnfw::tflite::TensorView<bool>::make(nnapi, id);
+ comparators[kTfLiteBool] = [this](int32_t id, const TfLiteTensor *expected_tensor,
+ const TfLiteTensor *obtained_tensor) {
+ const auto expected_view = nnfw::tflite::TensorView<bool>::make(expected_tensor);
+ const auto obtained_view = nnfw::tflite::TensorView<bool>::make(obtained_tensor);
- return compareSingleTensorView(expected, obtained, id);
+ return compareSingleTensorView(expected_view, obtained_view, id);
};
- for (const auto &id : interp.outputs())
+ for (int32_t idx = 0; idx < output_count; idx++)
{
- assert(interp.tensor(id)->type == nnapi.tensor(id)->type);
+ auto const expected_tensor = TfLiteInterpreterGetOutputTensor(&expected, idx);
+ auto const obtained_tensor = TfLiteInterpreterGetOutputTensor(&obtained, idx);
+ auto const tensor_type = TfLiteTensorType(expected_tensor);
+ assert(tensor_type == TfLiteTensorType(obtained_tensor));
- auto it = comparators.find(interp.tensor(id)->type);
+ auto it = comparators.find(tensor_type);
if (it == comparators.end())
{
const auto &comparator = it->second;
- if (!comparator(id, interp, nnapi))
+ if (!comparator(idx, expected_tensor, obtained_tensor))
{
all_matched = false;
}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/FeatureView.h"
-#include "tflite/TensorUtils.h"
-
-#include <cassert>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-nnfw::misc::feature::Shape getFeatureShape(const TfLiteTensor *tensor)
-{
- nnfw::misc::feature::Shape shape{tensor->dims->data[3], tensor->dims->data[1],
- tensor->dims->data[2]};
-
- return shape;
-}
-
-FeatureView<float>::FeatureView(::tflite::Interpreter &interp, const InputIndex &index)
-{
- const auto tensor_index = interp.inputs().at(index.asInt());
- auto tensor_ptr = interp.tensor(tensor_index);
-
- assert(isFloatTensor(tensor_ptr));
- assert(isFeatureTensor(tensor_ptr));
-
- _shape = getFeatureShape(tensor_ptr);
- _base = interp.typed_tensor<float>(tensor_index);
-}
-
-FeatureView<float>::FeatureView(::tflite::Interpreter &interp, const OutputIndex &index)
-{
- const auto tensor_index = interp.outputs().at(index.asInt());
- auto tensor_ptr = interp.tensor(tensor_index);
-
- assert(isFloatTensor(tensor_ptr));
- assert(isFeatureTensor(tensor_ptr));
-
- _shape = getFeatureShape(tensor_ptr);
- _base = interp.typed_tensor<float>(tensor_index);
-}
-
-float FeatureView<float>::at(uint32_t ch, uint32_t row, uint32_t col) const
-{
- return *(_base + getElementOffset(ch, row, col));
-}
-
-float &FeatureView<float>::at(uint32_t ch, uint32_t row, uint32_t col)
-{
- return *(_base + getElementOffset(ch, row, col));
-}
-
-} // namespace tflite
-} // namespace nnfw
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/OutputResetter.h"
-#include "tflite/TensorView.h"
-
-#include <misc/tensor/IndexIterator.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-void OutputResetter::run(::tflite::Interpreter &interp)
-{
- for (const auto &tensor_idx : interp.outputs())
- {
- TfLiteTensor *tensor = interp.tensor(tensor_idx);
- switch (tensor->type)
- {
- case kTfLiteInt32:
- resetValue<int32_t>(interp, tensor_idx);
- break;
- case kTfLiteUInt8:
- resetValue<uint8_t>(interp, tensor_idx);
- break;
- case kTfLiteInt8:
- resetValue<int8_t>(interp, tensor_idx);
- break;
- case kTfLiteBool:
- resetValue<bool>(interp, tensor_idx);
- break;
- case kTfLiteFloat32:
- resetValue<float>(interp, tensor_idx);
- break;
- default:
- throw std::runtime_error{"Not supported output type"};
- }
- }
-}
-
-template <typename T> void OutputResetter::resetValue(::tflite::Interpreter &interp, int tensor_idx)
-{
- auto tensor_view = nnfw::tflite::TensorView<T>::make(interp, tensor_idx);
-
- nnfw::misc::tensor::iterate(tensor_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) { tensor_view.at(ind) = 0; };
-}
-
-} // namespace tflite
-} // namespace nnfw
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/Quantization.h"
-
-TfLiteQuantizationParams make_default_quantization(void)
-{
- return TfLiteQuantizationParams{0.0f, 0};
-}
{
namespace tflite
{
+namespace
+{
+
+template <typename T>
+void setValue(nnfw::misc::RandomGenerator &randgen, const TfLiteTensor *tensor)
+{
+ auto tensor_view = nnfw::tflite::TensorView<T>::make(tensor);
+
+ nnfw::misc::tensor::iterate(tensor_view.shape())
+ << [&](const nnfw::misc::tensor::Index &ind) { tensor_view.at(ind) = randgen.generate<T>(); };
+}
-void RandomInputInitializer::run(::tflite::Interpreter &interp)
+} // namespace
+
+void RandomInputInitializer::run(TfLiteInterpreter &interp)
{
- for (const auto &tensor_idx : interp.inputs())
+ const auto input_count = TfLiteInterpreterGetInputTensorCount(&interp);
+ for (int32_t idx = 0; idx < input_count; idx++)
{
- TfLiteTensor *tensor = interp.tensor(tensor_idx);
- switch (tensor->type)
+ auto tensor = TfLiteInterpreterGetInputTensor(&interp, idx);
+ auto const tensor_type = TfLiteTensorType(tensor);
+ switch (tensor_type)
{
case kTfLiteFloat32:
- setValue<float>(interp, tensor_idx);
+ setValue<float>(_randgen, tensor);
break;
case kTfLiteInt32:
- setValue<int32_t>(interp, tensor_idx);
+ setValue<int32_t>(_randgen, tensor);
break;
case kTfLiteUInt8:
- setValue<uint8_t>(interp, tensor_idx);
+ setValue<uint8_t>(_randgen, tensor);
break;
case kTfLiteBool:
- setValue<bool>(interp, tensor_idx);
+ setValue<bool>(_randgen, tensor);
break;
case kTfLiteInt8:
- setValue<int8_t>(interp, tensor_idx);
+ setValue<int8_t>(_randgen, tensor);
break;
default:
throw std::runtime_error{"Not supported input type"};
}
}
-template <typename T>
-void RandomInputInitializer::setValue(::tflite::Interpreter &interp, int tensor_idx)
-{
- auto tensor_view = nnfw::tflite::TensorView<T>::make(interp, tensor_idx);
-
- nnfw::misc::tensor::iterate(tensor_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) { tensor_view.at(ind) = _randgen.generate<T>(); };
-}
-
} // namespace tflite
} // namespace nnfw
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/CopyInputInitializer.h"
-#include "tflite/OutputResetter.h"
-#include "tflite/RandomInputInitializer.h"
-#include "tflite/RandomTestRunner.h"
-#include "tflite/Diff.h"
-#include "tflite/TensorLogger.h"
-
-#include <misc/tensor/IndexIterator.h>
-#include <misc/tensor/Object.h>
-#include <misc/EnvVar.h>
-#include <misc/fp32.h>
-
-#include <cassert>
-#include <map>
-#include <functional>
-#include <iostream>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-using namespace std::placeholders;
-
-void RandomTestRunner::compile(const nnfw::tflite::Builder &builder)
-{
- _tfl_interp = builder.build();
- _nnapi = builder.build();
-
- _tfl_interp->UseNNAPI(false);
- _nnapi->UseNNAPI(true);
-
- // Allocate Tensors
- _tfl_interp->AllocateTensors();
- _nnapi->AllocateTensors();
-}
-
-int RandomTestRunner::run(size_t running_count)
-{
- assert(_tfl_interp->inputs() == _nnapi->inputs());
- assert(_tfl_interp->outputs() == _nnapi->outputs());
-
- nnfw::tflite::OutputResetter resetter;
- resetter.run(*(_tfl_interp.get()));
-
- RandomInputInitializer initializer{_randgen};
- initializer.run(*(_tfl_interp.get()));
-
- std::cout << "[NNAPI TEST] Run T/F Lite Interpreter without NNAPI" << std::endl;
- _tfl_interp->Invoke();
-
- for (size_t i = 1; i <= running_count; ++i)
- {
- resetter.run(*(_nnapi.get()));
-
- CopyInputInitializer copy_initializer{*(_tfl_interp.get())};
- copy_initializer.run(*(_nnapi.get()));
-
- std::cout << "[NNAPI TEST #" << i << "] Run T/F Lite Interpreter with NNAPI" << std::endl;
-
- if (_nnapi->Invoke() != kTfLiteOk)
- {
- throw std::runtime_error{"Failed to Run T/F Lite Interpreter with NNAPI"};
- }
-
- // Compare OFM
- std::cout << "[NNAPI TEST #" << i << "] Compare the result" << std::endl;
-
- const auto tolerance = _param.tolerance;
-
- auto equals = [tolerance](float lhs, float rhs) {
- // NOTE Hybrid approach
- // TODO Allow users to set tolerance for absolute_epsilon_equal
- if (nnfw::misc::fp32::absolute_epsilon_equal(lhs, rhs))
- {
- return true;
- }
-
- return nnfw::misc::fp32::epsilon_equal(lhs, rhs, tolerance);
- };
-
- nnfw::misc::tensor::Comparator comparator(equals);
- TfLiteInterpMatchApp app(comparator);
-
- app.verbose() = _param.verbose;
-
- bool res = app.run(*_tfl_interp, *_nnapi);
-
- if (!res)
- {
- return 255;
- }
-
- std::cout << "[NNAPI TEST #" << i << "] PASSED" << std::endl << std::endl;
-
- if (_param.tensor_logging)
- nnfw::tflite::TensorLogger::get().save(_param.log_path, *_tfl_interp);
- }
-
- return 0;
-}
-
-RandomTestRunner RandomTestRunner::make(uint32_t seed)
-{
- RandomTestParam param;
-
- param.verbose = nnfw::misc::EnvVar("VERBOSE").asInt(0);
- param.tolerance = nnfw::misc::EnvVar("TOLERANCE").asInt(1);
- param.tensor_logging = nnfw::misc::EnvVar("TENSOR_LOGGING").asBool(false);
- param.log_path = nnfw::misc::EnvVar("TENSOR_LOGGING").asString("tensor_log.txt");
-
- return RandomTestRunner{seed, param};
-}
-
-} // namespace tflite
-} // namespace nnfw
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/interp/FlatBufferBuilder.h"
-
-#include <tensorflow/lite/kernels/register.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-std::unique_ptr<::tflite::Interpreter> FlatBufferBuilder::build(void) const
-{
- std::unique_ptr<::tflite::Interpreter> interpreter;
-
- ::tflite::ops::builtin::BuiltinOpResolver resolver;
-
- ::tflite::InterpreterBuilder builder(_model, resolver);
-
- builder(&interpreter);
-
- return interpreter;
-}
-
-} // namespace tflite
-} // namespace nnfw
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/interp/FunctionBuilder.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-std::unique_ptr<::tflite::Interpreter> FunctionBuilder::build(void) const
-{
- auto res = std::unique_ptr<::tflite::Interpreter>{new ::tflite::Interpreter};
-
- _fn(*res);
-
- return res;
-}
-
-} // namespace tflite
-} // namespace nnfw
* NNFW_VERSION is a uint32 value representing nnfw runtime version
* in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
*/
-#define NNFW_VERSION 0x01001500
+#define NNFW_VERSION 0x01001600
#endif // __NNFW_VERSION_H__
#include "nnfw_api_internal.h"
#include "CustomKernelRegistry.h"
-#include "compiler/Compiler.h"
+#include "compiler/CompilerFactory.h"
#include "util/ConfigSource.h"
#include "util/Exceptions.h"
#include "util/logging.h"
{
if (session == nullptr)
return NNFW_STATUS_UNEXPECTED_NULL;
-
- // Create session
- *session = new (std::nothrow) nnfw_session();
- if (*session == nullptr)
+ try
{
- std::cerr << "Error during session creation" << std::endl;
- return NNFW_STATUS_OUT_OF_MEMORY;
+ auto new_session = std::unique_ptr<nnfw_session>(new nnfw_session());
+ new_session->_kernel_registry = std::make_shared<onert::api::CustomKernelRegistry>();
+ *session = new_session.release();
}
-
- // Initialize fields
- try
+ catch (const std::bad_alloc &e)
{
- (*session)->_kernel_registry = std::make_shared<onert::api::CustomKernelRegistry>();
+ std::cerr << "Error during session creation" << std::endl;
+ *session = nullptr; // Set nullptr on error to keep the old behavior
+ return NNFW_STATUS_OUT_OF_MEMORY;
}
catch (const std::exception &e)
{
std::cerr << "Error during session initialization : " << e.what() << std::endl;
- delete *session;
- *session = nullptr;
-
+ *session = nullptr; // Set nullptr on error to keep the old behavior
return NNFW_STATUS_ERROR;
}
-
return NNFW_STATUS_NO_ERROR;
}
std::string manifest_file_name = package_path + "/metadata/MANIFEST";
std::ifstream mfs(manifest_file_name);
- _package_file_path = package_path;
// extract the filename of the first(index 0) model
// e.g. In MANIFEST file, { "models" : [ "firstmodel.tflite", "2nd.tflite" ] }
Json::Value root;
}
}
_nnpkg = std::make_shared<onert::ir::NNPkg>();
- for (uint32_t i = 0; i < models.size(); ++i)
+ auto num_models = models.size();
+ if (num_models == 0 || (num_models - 1) > onert::ir::ModelIndex::max())
+ {
+ std::cerr << "Invalid model size - " << std::to_string(num_models) << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
+ for (uint16_t i = 0; i < num_models; ++i)
{
auto model_file_path = package_path + std::string("/") + models[i].asString();
auto model_type = model_types[i].asString();
for (uint32_t j = 0; j < tos.size(); ++j)
_nnpkg->addEdge(toIODesc(fromtos[i]["from"].asString()), toIODesc(tos[j].asString()));
}
+
+ _nnpkg->verify();
_state = State::MODEL_LOADED;
}
catch (const std::exception &e)
try
{
- // TODO: Compile all models in case of multiple models
- if (_nnpkg->model_count() > 2)
- {
- std::cerr << "Error during model prepare : more than 3 multiple models are not supported yet."
- << std::endl;
- return NNFW_STATUS_ERROR;
- }
- auto compiler = std::make_unique<onert::compiler::Compiler>(_nnpkg, _coptions);
+ auto compiler = onert::compiler::CompilerFactory::get().create(_nnpkg, _coptions);
_nnpkg.reset();
_compiler_artifact = compiler->compile();
_execution = std::make_unique<onert::exec::Execution>(_compiler_artifact->_executors);
return NNFW_STATUS_NO_ERROR;
}
-NNFW_STATUS nnfw_session::prepare_pipeline(const char *map_file_path)
+NNFW_STATUS nnfw_session::prepare_pipeline(const char *)
{
- // NOTE. If users want to run prepare_pipeline() more than one time, this could be removed.
- if (!isStateModelLoaded())
- {
- std::cerr << "Error during model prepare pipeline : ";
- if (isStateInitialized())
- {
- std::cerr << "prepare_pipeline should be run once";
- }
- else
- {
- std::cerr << "invalid state";
- }
- std::cerr << std::endl;
- return NNFW_STATUS_INVALID_STATE;
- }
-
- try
- {
- auto model = _nnpkg->primary_model();
- auto compiler = std::make_unique<onert::compiler::Compiler>(model, *_coptions[0]);
- _nnpkg.reset();
- auto artifacts = compiler->compile(_package_file_path.c_str(), map_file_path);
-
- for (auto it = artifacts.begin(); it != artifacts.end(); ++it)
- {
- _executions.push_back(std::make_shared<onert::exec::Execution>(it->get()->_executors));
- }
- make_dependency();
- _threads.resize(_executions.size());
- for (uint32_t i = 0; i < _threads.size(); i++)
- {
- _threads[i] = std::thread(&onert::exec::Execution::runInference, _executions[i].get());
- }
- }
- catch (const std::exception &e)
- {
- std::cerr << "Error during model prepare : " << e.what() << std::endl;
- return NNFW_STATUS_ERROR;
- }
-
- _state = State::PREPARED;
- return NNFW_STATUS_NO_ERROR;
+ std::cerr << "Pipeline prepare_pipeline: deprecated feature " << std::endl;
+ return NNFW_STATUS_ERROR;
}
NNFW_STATUS nnfw_session::run()
return NNFW_STATUS_INVALID_STATE;
}
- if (!_executions.empty())
- {
- std::cerr << "Error during nnfw_session::run : not supported for pipeline run" << std::endl;
- return NNFW_STATUS_ERROR;
- }
-
try
{
_execution->execute();
return NNFW_STATUS_INVALID_STATE;
}
- if (!_executions.empty())
- {
- std::cerr << "Error during nnfw_session::run_async : not supported for pipeline run"
- << std::endl;
- return NNFW_STATUS_ERROR;
- }
-
_execution->startExecute();
_state = State::RUNNING;
return NNFW_STATUS_ERROR;
}
- if (!_executions.empty())
- {
- std::cerr << "Error during nnfw_session::await : not supported for pipeline run" << std::endl;
- return NNFW_STATUS_ERROR;
- }
-
_execution->waitFinish();
_state = State::FINISHED_RUN;
return NNFW_STATUS_ERROR;
}
- if (!_executions.empty())
- {
- std::cerr << "Error during nnfw_session::set_input : not supported for pipeline run"
- << std::endl;
- return NNFW_STATUS_ERROR;
- }
-
try
{
_execution->setInput(onert::ir::IOIndex(index), buffer, length);
return NNFW_STATUS_ERROR;
}
- if (!_executions.empty())
- {
- std::cerr << "Error during nnfw_session::set_output : not supported for pipeline run"
- << std::endl;
- return NNFW_STATUS_ERROR;
- }
-
try
{
_execution->setOutput(onert::ir::IOIndex(index), buffer, length);
std::cerr << "Error during nnfw_session::input_size, number is null pointer." << std::endl;
return NNFW_STATUS_UNEXPECTED_NULL;
}
- *number = primary_subgraph()->getInputs().size();
+ *number = getInputSize();
}
catch (const std::exception &e)
{
std::cerr << "Error during nnfw_session::output_size, number is null pointer." << std::endl;
return NNFW_STATUS_UNEXPECTED_NULL;
}
- *number = primary_subgraph()->getOutputs().size();
+ *number = getOutputSize();
}
catch (const std::exception &e)
{
NNFW_STATUS nnfw_session::set_input_layout(uint32_t index, NNFW_LAYOUT layout)
{
+ if (!isStatePreparedOrFinishedRun())
+ {
+ std::cerr << "Error during nnfw_session::set_input_layout : "
+ << "run should be run after prepare" << std::endl;
+ return NNFW_STATUS_INVALID_STATE;
+ }
+
try
{
if (layout != NNFW_LAYOUT_NONE && layout != NNFW_LAYOUT_CHANNELS_FIRST &&
std::cerr << "Error during nnfw_session::set_input_layout, not supported layout" << std::endl;
return NNFW_STATUS_ERROR;
}
- if (_execution)
- {
- _execution->setInputLayout(onert::ir::IOIndex(index), convertLayout(layout));
- }
- else
- {
- _executions.at(0)->setInputLayout(onert::ir::IOIndex(index), convertLayout(layout));
- }
+
+ _execution->setInputLayout(onert::ir::IOIndex(index), convertLayout(layout));
}
catch (const std::exception &e)
{
NNFW_STATUS nnfw_session::set_output_layout(uint32_t index, NNFW_LAYOUT layout)
{
+ if (!isStatePreparedOrFinishedRun())
+ {
+ std::cerr << "Error during nnfw_session::set_output_layout : "
+ << "run should be run after prepare" << std::endl;
+ return NNFW_STATUS_INVALID_STATE;
+ }
+
try
{
if (layout != NNFW_LAYOUT_NONE && layout != NNFW_LAYOUT_CHANNELS_FIRST &&
<< std::endl;
return NNFW_STATUS_ERROR;
}
- if (_execution)
- {
- _execution->setOutputLayout(onert::ir::IOIndex(index), convertLayout(layout));
- }
- else
- {
- _executions.at(_executions.size() - 1)
- ->setOutputLayout(onert::ir::IOIndex(index), convertLayout(layout));
- }
+
+ _execution->setOutputLayout(onert::ir::IOIndex(index), convertLayout(layout));
}
catch (const std::exception &e)
{
if (!isStatePreparedOrFinishedRun())
{
- // In this case, if we apply input shape in primary_subgraph, it will propagate after
- // compilation and excution
- auto model = _nnpkg->primary_model();
- auto primary_subgraph = model->primary_subgraph();
- auto ind = primary_subgraph->getInputs().at(index);
- auto &input = primary_subgraph->operands().at(ind);
- // overwrite input shape with the shape from ti
- input.info().shape(new_shape);
+ // In this case, if we apply input shape, it will propagate after compilation and excution
+ auto &info = _nnpkg->inputInfo(index);
+ info.shape(new_shape);
}
else // when called after nnfw_session::prepare()
- {
- if (_execution)
- {
- _execution->changeInputShape(onert::ir::IOIndex(index), new_shape);
- }
- else
- {
- _executions.at(0)->changeInputShape(onert::ir::IOIndex(index), new_shape);
- }
- }
+ _execution->changeInputShape(onert::ir::IOIndex(index), new_shape);
return NNFW_STATUS_NO_ERROR;
}
<< std::endl;
return NNFW_STATUS_UNEXPECTED_NULL;
}
- if (index >= primary_subgraph()->getInputs().size())
+
+ if (index >= getInputSize())
{
std::cerr << "Error during nnfw_session::input_tensorinfo, index is out of range."
<< std::endl;
return NNFW_STATUS_ERROR;
}
- auto opidx = primary_subgraph()->getInputs().at(index);
- auto shape = primary_subgraph()->operands().at(opidx).shape();
- if (isStatePreparedOrFinishedRun())
+
+ if (isStateModelLoaded())
+ {
+ auto info = _nnpkg->inputInfo(index);
+ fillTensorInfo(ti, info.shape(), info.typeInfo().type());
+ }
+ else
{
- shape = _execution ? _execution->getInputShape(onert::ir::IOIndex{index})
- : _executions.at(0)->getInputShape(onert::ir::IOIndex{index});
+ auto io_index = onert::ir::IOIndex{index};
+ auto shape = _execution->getInputShape(io_index);
+ auto dtype = _compiler_artifact->_executors->inputInfo(io_index).typeInfo().type();
+ fillTensorInfo(ti, shape, dtype);
}
- auto dtype = primary_subgraph()->operands().at(opidx).typeInfo().type();
- fillTensorInfo(ti, shape, dtype);
}
catch (const std::exception &e)
{
return NNFW_STATUS_UNEXPECTED_NULL;
}
- if (index >= primary_subgraph()->getOutputs().size())
- {
- std::cerr << "Error during nnfw_session::output_tensorinfo, index is out of range."
- << std::endl;
- return NNFW_STATUS_ERROR;
- }
-
try
{
- auto opidx = primary_subgraph()->getOutputs().at(index);
- auto shape = primary_subgraph()->operands().at(opidx).shape();
- // If it is called after `nnfw_run` then get the shape from Execution, not from the graph
- if (isStateFinishedRun())
+ if (index >= getOutputSize())
+ {
+ std::cerr << "Error during nnfw_session::output_tensorinfo, index is out of range."
+ << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+
+ if (isStateModelLoaded())
{
- shape = _execution
- ? _execution->getOutputShape(onert::ir::IOIndex{index})
- : _executions.at(_executions.size() - 1)->getOutputShape(onert::ir::IOIndex{index});
+ auto info = _nnpkg->outputInfo(index);
+ fillTensorInfo(ti, info.shape(), info.typeInfo().type());
+ }
+ else
+ {
+ auto io_index = onert::ir::IOIndex{index};
+ auto shape = _execution->getOutputShape(io_index);
+ auto dtype = _compiler_artifact->_executors->outputInfo(io_index).typeInfo().type();
+ fillTensorInfo(ti, shape, dtype);
}
- auto dtype = primary_subgraph()->operands().at(opidx).typeInfo().type();
- fillTensorInfo(ti, shape, dtype);
}
catch (const std::exception &e)
{
return NNFW_STATUS_NO_ERROR;
}
-void nnfw_session::make_dependency()
+NNFW_STATUS nnfw_session::push_pipeline_input(std::vector<void *> *, std::vector<uint32_t> *)
{
- for (uint32_t out_exe = 0; out_exe < _executions.size(); out_exe++)
- {
- auto &out_graph = _executions[out_exe]->primary_subgraph();
- for (uint32_t in_exe = 0; in_exe < _executions.size(); in_exe++)
- {
- if (out_exe == in_exe)
- continue;
- auto &in_graph = _executions[in_exe]->primary_subgraph();
- for (auto out = out_graph._name_to_output_begin(); out != out_graph._name_to_output_end();
- out++)
- {
- auto out_opidx = out_graph.getOutputs().at(out->second);
- auto out_shape = out_graph.operands().at(out_opidx).shape();
- for (auto in = in_graph._name_to_input_begin(); in != in_graph._name_to_input_end(); in++)
- {
- if (out->first != in->first)
- continue;
-
- auto in_opidx = in_graph.getInputs().at(in->second);
- auto in_shape = in_graph.operands().at(in_opidx).shape();
- if (out_shape.rank() != in_shape.rank())
- continue;
-
- bool is_same = true;
- for (int32_t i = 0; i < out_shape.rank(); i++)
- {
- if (out_shape.dim(i) != in_shape.dim(i))
- {
- is_same = false;
- break;
- }
- }
-
- if (is_same)
- _executions[out_exe]->pushNextExe(_executions[in_exe], out->second, in->second);
- }
- }
- }
- }
-}
-
-NNFW_STATUS nnfw_session::push_pipeline_input(std::vector<void *> *inputs,
- std::vector<uint32_t> *lengths)
-{
- static uint32_t count = 0;
- if (inputs->empty())
- {
- _executions[0]->setFinish();
- for (uint32_t i = 0; i < _threads.size(); i++)
- {
- _threads[i].join();
- }
- return NNFW_STATUS_NO_ERROR;
- }
- _executions[0]->asyncIoDescSemWait();
- _executions[0]->createNewAsyncDesc(count++);
- for (uint32_t i = 0; i < inputs->size(); i++)
- {
- _executions[0]->executeAsyncInput(onert::ir::IOIndex(i), inputs->at(i), lengths->at(i));
- }
- _executions[0]->asyncIoDescSemPost();
- return NNFW_STATUS_NO_ERROR;
+ std::cerr << "Pipeline push_pipeline_input: deprecated feature " << std::endl;
+ return NNFW_STATUS_ERROR;
}
-NNFW_STATUS nnfw_session::pop_pipeline_output(std::vector<void *> *outputs)
+NNFW_STATUS nnfw_session::pop_pipeline_output(std::vector<void *> *)
{
- auto results = _executions[_executions.size() - 1]->getAsyncResults();
- while (results->empty())
- {
- if (_executions[_executions.size() - 1]->stopWait())
- return NNFW_STATUS_ERROR;
- }
-
- auto result = results->front();
- results->pop_front();
- for (uint32_t i = 0; i < result.size(); i++)
- outputs->push_back(result[i]);
- return NNFW_STATUS_NO_ERROR;
+ std::cerr << "Pipeline pop_pipeline_output: deprecated feature " << std::endl;
+ return NNFW_STATUS_ERROR;
}
NNFW_STATUS nnfw_session::register_custom_operation(const std::string &id,
{
options.he_profiling_mode = toBool(value);
}
- else if (skey == config::DISABLE_COMPILE)
- {
- options.disable_compile = toBool(value);
- }
else
{
return NNFW_STATUS_ERROR;
{
if (_nnpkg != nullptr)
{
- assert(_execution == nullptr && _executions.empty());
+ assert(_execution == nullptr);
return _nnpkg->primary_model()->primary_subgraph().get();
}
else
{
- assert(_execution != nullptr || !_executions.empty());
- // TODO Remove const_cast
+ assert(_execution != nullptr);
// We assumed the graph will not change after compilation, but shape could change
- if (!_executions.empty())
- {
- return &_executions[0]->primary_parentgraph();
- }
-
return &_execution->primary_subgraph();
}
}
+uint32_t nnfw_session::getInputSize()
+{
+ if (isStateInitialized())
+ throw std::runtime_error{"Model is not loaded yet"};
+
+ if (isStateModelLoaded())
+ return _nnpkg->inputSize();
+
+ // Session is prepared (general inference)
+ return _compiler_artifact->_executors->inputSize();
+}
+
+uint32_t nnfw_session::getOutputSize()
+{
+ if (isStateInitialized())
+ throw std::runtime_error{"Model is not loaded yet"};
+
+ if (isStateModelLoaded())
+ return _nnpkg->outputSize();
+
+ // Session is prepared (general inference)
+ return _compiler_artifact->_executors->outputSize();
+}
+
NNFW_STATUS nnfw_session::get_config(const char *key, char *value, size_t value_size)
{
if (!isStateModelLoaded())
{
assert(_nnpkg == nullptr);
assert(_coptions.empty());
- assert(_execution == nullptr && _executions.empty());
+ assert(_execution == nullptr);
return true;
}
else
{
assert(_nnpkg != nullptr);
assert(!_coptions.empty());
- assert(_execution == nullptr && _executions.empty());
+ assert(_execution == nullptr);
return true;
}
else
{
assert(_nnpkg == nullptr);
assert(!_coptions.empty());
- assert(_execution != nullptr || !_executions.empty());
+ assert(_execution != nullptr);
return true;
}
else
{
assert(_nnpkg == nullptr);
assert(!_coptions.empty());
- assert(_execution != nullptr || !_executions.empty());
+ assert(_execution != nullptr);
return true;
}
return false;
{
assert(_nnpkg == nullptr);
assert(!_coptions.empty());
- assert(_execution != nullptr || !_executions.empty());
+ assert(_execution != nullptr);
return true;
}
else
NNFW_STATUS set_available_backends(const char *backends);
NNFW_STATUS set_op_backend(const char *op, const char *backend);
- // accessor
- std::vector<std::shared_ptr<onert::exec::Execution>> *get_executions() { return &_executions; }
-
//
// Internal-only API
//
//
// Experimental API
//
- void make_dependency();
NNFW_STATUS push_pipeline_input(std::vector<void *> *inputs, std::vector<uint32_t> *lengths);
NNFW_STATUS pop_pipeline_output(std::vector<void *> *outputs);
private:
const onert::ir::Graph *primary_subgraph();
+ uint32_t getInputSize();
+ uint32_t getOutputSize();
+
bool isStateInitialized();
bool isStateModelLoaded();
bool isStatePrepared();
std::unique_ptr<onert::exec::Execution> _execution;
std::shared_ptr<onert::api::CustomKernelRegistry> _kernel_registry;
std::vector<std::thread> _threads;
- std::vector<std::shared_ptr<onert::exec::Execution>> _executions;
- std::string _package_file_path;
};
#endif // __API_NNFW_API_INTERNAL_H__
auto output_tensor = _tensor_reg->getAclTensor(ofm_index);
std::vector<const ::arm_compute::ICLTensor *> input_tensors;
- for (auto &ifm_ind : input_indexes)
+ for (const auto &ifm_ind : input_indexes)
input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
std::unique_ptr<::arm_compute::IFunction> fn;
auto &offset = parent_info.coordinates;
auto frontend_layout = parent_info.frontend_layout;
- assert(obj.shape().rank() <= ir::Shape::MAX_RANK);
+ assert(obj.shape().rank() <= ir::Shape::kMaxRank);
auto shape = obj.shape();
if (_operands.at(parent_index).shape().rank() >= 4 && frontend_layout == ir::Layout::NHWC &&
backend_layout == ir::Layout::NCHW)
{
auto lifetime_map = cl_common::createLifetimeMap(_lifetime_seq, _parent_map);
- for (auto &entry : lifetime_map)
+ for (const auto &entry : lifetime_map)
{
- auto &use = entry.second;
- auto use_type = use.first;
- auto use_index = use.second;
+ const auto &use = entry.second;
+ const auto &use_type = use.first;
+ const auto &use_index = use.second;
assert(use_index.valid());
if (use_type == UsesType::FIRST)
_tensor_mgr->startLifetime(use_index);
assert(_tensor_mgr->nonconstTensors().size() == 0);
// Normal tensors
- for (auto &entry : _tensor_info_map)
+ for (const auto &entry : _tensor_info_map)
{
- auto ind = entry.first;
+ const auto &ind = entry.first;
if (_parent_map.count(ind) > 0)
continue;
assert(_tensor_mgr->nonconstSubtensors().size() == 0);
// TODO Iterate `_parent_map` instead, once the optimizer bug is fixed
// `Optimizer` iterates the entire Operations, so there is a bug if iterating _parent_map
- for (auto &entry : _tensor_info_map)
+ for (const auto &entry : _tensor_info_map)
{
- auto ind = entry.first;
+ const auto &ind = entry.first;
if (_parent_map.count(ind) == 0)
continue;
bool AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::areSubTensorsOf(
const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq)
{
- for (auto &cand : seq)
+ for (const auto &cand : seq)
{
if (!isSubTensorOf(parent, cand))
{
.operands()
.iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
- for (auto &it : ret)
+ for (auto &&it : ret)
{
auto &fn_seq = it.second;
fn_seq->iterate([&](exec::IFunction &ifunc) {
.operands()
.iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
- for (auto &it : ret)
+ for (auto &&it : ret)
{
auto &fn_seq = it.second;
fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
const auto output_index{node.getOutputs().at(0)};
std::vector<const IPortableTensor *> input_tensors;
- for (auto &input_idx : node.getInputs())
+ for (const auto &input_idx : node.getInputs())
input_tensors.emplace_back(_tensor_reg->getPortableTensor(input_idx));
auto output_tensor = _tensor_reg->getPortableTensor(output_index);
auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
std::vector<const IPortableTensor *> input_tensors;
- for (auto &ifm_idx : node.getInputs())
+ for (const auto &ifm_idx : node.getInputs())
input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
auto fn = std::make_unique<ops::ConcatLayer>();
auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
std::vector<const IPortableTensor *> input_tensors;
- for (auto &ifm_idx : node.getInputs())
+ for (const auto &ifm_idx : node.getInputs())
input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
const auto equation = node.param().equation;
auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
std::vector<custom::TypeInfo> &types,
std::vector<IPortableTensor *> &tensors) {
- for (auto &idx : opSeq)
+ for (const auto &idx : opSeq)
{
const auto &operand = _ctx.at(idx);
// TODO make sure using `_current_layout` is correct for custom operations
auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
std::vector<const IPortableTensor *> input_tensors;
- for (auto &ifm_idx : node.getInputs())
+ for (const auto &ifm_idx : node.getInputs())
input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
auto fn = std::make_unique<ops::PackLayer>();
auto input_tensor = _tensor_reg->getPortableTensor(input_index);
std::vector<IPortableTensor *> output_tensors;
- for (auto &output_idx : node.getOutputs())
+ for (const auto &output_idx : node.getOutputs())
output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
auto fn = std::make_unique<ops::UnpackLayer>();
auto axis_tensor = _tensor_reg->getPortableTensor(axis_idx);
std::vector<IPortableTensor *> out_tensors;
- for (auto &output_idx : node.getOutputs())
+ for (const auto &output_idx : node.getOutputs())
out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
auto fn = std::make_unique<ops::SplitLayer>();
auto output_tensor = _tensor_reg->getPortableTensor(ofm_index);
std::vector<const IPortableTensor *> input_tensors;
- for (auto &ifm_idx : node.getInputs())
+ for (const auto &ifm_idx : node.getInputs())
input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
const auto epsilon = node.param().epsilon;
auto in_split_dim = _tensor_reg->getPortableTensor(split_dim);
std::vector<IPortableTensor *> out_tensors;
- for (auto &output_idx : node.getOutputs())
+ for (const auto &output_idx : node.getOutputs())
out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx));
auto fn = std::make_unique<ops::SplitVLayer>();
#include "TensorBuilder.h"
#include "tensorflow/lite/delegates/gpu/cl/environment.h"
+#include "tensorflow/lite/delegates/gpu/common/precision.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
namespace onert
{
return nullptr;
}
- auto tm = createTensorManager(&environment->context());
- auto tr = std::make_shared<TensorRegistry>(tm);
-
- tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info;
- create_info.precision = tflite::gpu::cl::CalculationsPrecision::F32;
+ tflite::gpu::CreateGpuModelInfo create_info;
+ create_info.precision = tflite::gpu::CalculationsPrecision::F32;
create_info.storage_type =
tflite::gpu::cl::GetStorageTypeWithMinimalMemoryConsumption(environment->device().GetInfo());
- create_info.hints.Add(tflite::gpu::cl::ModelHints::kFastestInference);
+ create_info.hints.Add(tflite::gpu::ModelHints::kFastestInference);
+
+ auto tm = createTensorManager(&environment->context(), create_info, environment);
+
+ auto tr = std::make_shared<TensorRegistry>(tm);
auto cc = std::make_shared<tflite::gpu::cl::CreationContext>();
cc->device = environment->GetDevicePtr();
cc->queue = environment->queue();
cc->cache = environment->program_cache();
- auto tb = std::make_shared<TensorBuilder>(operands, tm, create_info, environment);
+ auto tb = std::make_shared<TensorBuilder>(operands, tm);
context->tensor_registry = tr;
context->tensor_builder = tb;
return tensor_registry.get();
}
+FunctionMap BackendContext::genKernels()
+{
+ FunctionMap fn_map;
+
+ for (auto op_ind : _data.op_order)
+ {
+ auto fn_seq = kernel_gen->generate(op_ind);
+ fn_map.emplace_back(op_ind, std::move(fn_seq));
+ }
+
+ kernel_gen->get_operation(fn_map);
+ tensor_builder->allocate();
+ // NOTE For memory optimization, we want to free some operand data
+ const_cast<ir::Graph &>(*_data.graph)
+ .operands()
+ .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+ for (auto &&it : fn_map)
+ {
+ auto &fn_seq = it.second;
+ fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+ }
+
+ return fn_map;
+}
+
} // namespace gpu_cl
} // namespace backend
} // namespace onert
#include "ConstantInitializer.h"
#include "KernelGenerator.h"
#include "TensorBuilder.h"
+
#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
namespace onert
}
ITensorRegistry *genTensors() override;
+ FunctionMap genKernels() override;
protected:
void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
return()
endif(NOT Fp16_FOUND)
-nnas_find_package(TensorFlowGpu QUIET)
+nnas_find_package(VulkanSource QUIET)
+if(NOT VulkanSource_FOUND)
+ return()
+endif(NOT VulkanSource_FOUND)
+
+nnas_find_package(Opengl_HeadersSource QUIET)
+if(NOT Opengl_HeadersSource_FOUND)
+ return()
+endif(NOT Opengl_HeadersSource_FOUND)
+
+nnas_find_package(Egl_HeadersSource QUIET)
+if(NOT Egl_HeadersSource_FOUND)
+ return()
+endif(NOT Egl_HeadersSource_FOUND)
+
+if (NOT ${TARGET_OS} MATCHES "tizen")
+ nnas_find_package(FlatBuffers REQUIRED)
+endif ()
+
+nnfw_find_package(TensorFlowGpu QUIET)
if(NOT TensorFlowGpu_FOUND)
message(FATAL_ERROR 'TensorFlowGpu lib not found')
return()
add_library(${LIB_ONERT_BACKEND_GPU_CL} SHARED ${SOURCES})
target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
-target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${TENSORFLOWGPU_SOURCE_DIR})
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${TensorFlowSource_DIR})
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${VulkanSource_DIR}/include)
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${Opengl_HeadersSource_DIR}/api)
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${Egl_HeadersSource_DIR}/api)
+
+if (${TARGET_OS} MATCHES "tizen")
+ target_compile_options(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE "-Wno-error=deprecated-copy")
+endif ()
+
+target_compile_options(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE "-DCL_TARGET_OPENCL_VERSION=220" "-DEGL_NO_X11")
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE abseil)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE dl)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE farmhash)
-target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} INTERFACE Open_CL_Headers)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE OpenCL_Headers)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE fp16)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE TensorFlowGpu)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE onert_core)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${LIB_ONERT_BACKEND_CL_COMMON})
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE nnfw_common)
target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE nnfw_coverage)
+if (${TARGET_OS} MATCHES "tizen")
+ target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE flatbuffers)
+else()
+ target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE flatbuffers::flatbuffers)
+endif ()
set_target_properties(${LIB_ONERT_BACKEND_GPU_CL} PROPERTIES OUTPUT_NAME backend_gpu_cl)
COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_GPU_CL}>)
endif()
+add_library(tflite_ignore_warnings INTERFACE)
+target_compile_options(tflite_ignore_warnings INTERFACE -Wno-unused-parameter -Wno-sign-compare)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE tflite_ignore_warnings)
+
install(TARGETS ${LIB_ONERT_BACKEND_GPU_CL} DESTINATION lib)
#include <vector>
#include <memory>
-#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
-#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_operation.h"
namespace onert
{
class ClFunction : public ::onert::exec::IFunction
{
public:
- ClFunction() : _gpu_operations(), _creation_context() {}
-
-public:
- void configure(std::shared_ptr<tflite::gpu::cl::CreationContext> creation_context)
+ ClFunction(std::shared_ptr<tflite::gpu::cl::CreationContext> creation_context)
+ : _creation_context(creation_context), _gpu_operations()
{
- _creation_context = creation_context;
}
- void add_operation(std::unique_ptr<tflite::gpu::cl::GPUOperation> gpu_operation)
+public:
+ void add_operation(tflite::gpu::cl::ClOperation *gpu_operation)
{
- _gpu_operations.push_back(std::move(gpu_operation));
+ _gpu_operations.push_back(gpu_operation);
}
void run() override
{
- for (const auto &gpu_operation : _gpu_operations)
+ for (const auto gpu_operation : _gpu_operations)
{
if (!gpu_operation->AddToQueue(_creation_context->queue).ok())
{
throw std::runtime_error("Failed to AddToQueue.");
}
- if (!_creation_context->queue->WaitForCompletion().ok())
- {
- throw std::runtime_error("Failed to WaitForCompletion.");
- }
}
}
void prepare() override
{
- for (const auto &gpu_operation : _gpu_operations)
+ for (const auto gpu_operation : _gpu_operations)
{
+ if (!gpu_operation->GetGpuOperation().AssembleCode(_creation_context->GetGpuInfo()).ok())
+ {
+ throw std::runtime_error("Failed to AssembleCode.");
+ }
if (!gpu_operation->Compile(*_creation_context).ok())
{
throw std::runtime_error("Failed to Compile.");
}
-
if (!gpu_operation->UpdateParams().ok())
{
throw std::runtime_error("Failed to UpdateParams.");
}
+ gpu_operation->GetGpuOperation().args_.ReleaseCPURepresentation();
}
}
private:
- std::vector<std::unique_ptr<tflite::gpu::cl::GPUOperation>> _gpu_operations;
std::shared_ptr<tflite::gpu::cl::CreationContext> _creation_context;
+ std::vector<tflite::gpu::cl::ClOperation *> _gpu_operations;
};
} // namespace gpu_cl
bool supportDynamicTensor() override { return false; }
bool supportFP16() override { return true; }
std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
-
-private:
- void *_handle = nullptr;
};
} // namespace gpu_cl
#include "TensorManager.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
-#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
-#include "tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h"
-#include "tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.h"
-#include "tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h"
+#include "tensorflow/lite/delegates/gpu/common/tensor.h"
+#include "tensorflow/lite/delegates/gpu/common/tasks/elementwise.h"
+#include "tensorflow/lite/delegates/gpu/common/selectors/convolution_selector.h"
+#include "tensorflow/lite/delegates/gpu/common/selectors/dw_convolution_selector.h"
+#include "tensorflow/lite/delegates/gpu/common/selectors/simple_selectors.h"
#include "ir/Operations.h"
#include "ir/Operations.Include.h"
#include "util/logging.h"
#include "util/Utils.h"
-using namespace tflite::gpu;
-using namespace tflite::gpu::cl;
-
namespace onert
{
namespace backend
namespace gpu_cl
{
-HW ToHW(int32_t h, int32_t w) { return HW(h > 0 ? h : 1, w > 0 ? w : 1); }
+void KernelGenerator::addClNode(const std::vector<ir::OperandIndex> &inputs,
+ const std::vector<ir::OperandIndex> &outputs,
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op)
+{
+ tflite::gpu::cl::CLNode cl_node;
+ cl_node.cl_operation.Init(std::move(gpu_op));
+ cl_node.inputs.resize(inputs.size());
+ for (size_t i = 0; i < inputs.size(); ++i)
+ {
+ cl_node.inputs[i] = inputs[i].value();
+ }
+ cl_node.outputs.resize(outputs.size());
+ for (size_t i = 0; i < outputs.size(); ++i)
+ {
+ cl_node.outputs[i] = outputs[i].value();
+ }
+ _nodes.push_back(std::move(cl_node));
+ _operation_indexes.push_back(_operation_index);
+ return;
+}
+
+void KernelGenerator::get_operation(FunctionMap &Functions)
+{
+ size_t size = _nodes.size();
+ size_t i = 0;
+ for (auto &&it : Functions)
+ {
+ auto index = it.first;
+ auto node_index = _operation_indexes[i];
+ while (index == node_index)
+ {
+ auto &fn_seq = it.second;
+ auto &node = _nodes[i++];
+ for (size_t j = 0; j < node.inputs.size(); ++j)
+ {
+ uint32_t idx = node.inputs[j];
+ node.cl_operation.GetGpuOperation().SetSrc(
+ _tensor_reg->getClTensor(ir::OperandIndex{idx})->handle(), j);
+ }
+ for (size_t j = 0; j < node.outputs.size(); ++j)
+ {
+ uint32_t idx = node.outputs[j];
+ node.cl_operation.GetGpuOperation().SetDst(
+ _tensor_reg->getClTensor(ir::OperandIndex{idx})->handle(), j);
+ }
+ fn_seq->iterate([&](exec::IFunction &ifunc) {
+ static_cast<ClFunction &>(ifunc).add_operation(&node.cl_operation);
+ });
+ if (i == size)
+ {
+ break;
+ }
+ node_index = _operation_indexes[i];
+ }
+ if (i == size)
+ {
+ break;
+ }
+ }
+}
-template <typename AttrT>
-void UpdatePadding(const ir::PaddingType type, const BHWC &input_shape, AttrT *attr)
+absl::Status KernelGenerator::readConstTensor(const ir::OperandIndex &index,
+ tflite::gpu::TensorOrScalar *param)
{
- if (type == ir::PaddingType::SAME)
+ const auto shape = _ctx.at(index).shape();
+ if (shape.rank() == 0 && shape.num_elements() == 1)
{
- attr->padding = CalculateSamePadding(input_shape, *attr);
+ tflite::gpu::Tensor<tflite::gpu::Scalar, tflite::gpu::DataType::FLOAT32> tensor;
+ tensor.shape.v = 1;
+ tensor.data.resize(1);
+ std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+ *param = tensor.data[0];
}
else
{
- attr->padding.prepended = HW(0, 0);
- attr->padding.appended = HW(0, 0);
+ if (CheckIfLinearConvertible(&shape))
+ {
+ tflite::gpu::Tensor<tflite::gpu::Linear, tflite::gpu::DataType::FLOAT32> tensor;
+ tensor.shape.v = shape.dim(shape.rank() - 1);
+ tensor.data.resize(shape.num_elements());
+ std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+ *param = std::move(tensor);
+ }
+ else
+ {
+ tflite::gpu::Tensor<tflite::gpu::HWC, tflite::gpu::DataType::FLOAT32> tensor;
+ if (shape.rank() == 3)
+ {
+ tensor.shape.h = shape.dim(0);
+ tensor.shape.w = shape.dim(1);
+ tensor.shape.c = shape.dim(2);
+ }
+ else if (shape.rank() == 4)
+ {
+ if (shape.dim(0) != 1)
+ {
+ return absl::UnimplementedError("Batch size is not equal to 1.");
+ }
+ tensor.shape.h = shape.dim(1);
+ tensor.shape.w = shape.dim(2);
+ tensor.shape.c = shape.dim(3);
+ }
+ else
+ {
+ return absl::InvalidArgumentError(
+ "Expected a 3D tensor of shape HxWxC or a 4D tensor of shape 1xHxWxC.");
+ }
+ tensor.data.resize(shape.num_elements());
+ std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+ *param = std::move(tensor);
+ }
}
+ return absl::OkStatus();
}
-PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+absl::Status KernelGenerator::readConstTensor(
+ const ir::OperandIndex &index,
+ absl::variant<tflite::gpu::Tensor<tflite::gpu::Linear, tflite::gpu::DataType::FLOAT32>,
+ tflite::gpu::Tensor<tflite::gpu::HWC, tflite::gpu::DataType::FLOAT32>> *alpha)
{
- switch (type_ir)
+ const auto shape = _ctx.at(index).shape();
+ if (CheckIfLinearConvertible(&shape))
{
- case ir::operation::Pool2D::PoolType::AVG:
- return PoolingType::AVERAGE;
- case ir::operation::Pool2D::PoolType::MAX:
- return PoolingType::MAX;
- default:
- throw std::runtime_error("gpu_Cl KernelGenerator : Not supported operation yet");
+ tflite::gpu::Tensor<tflite::gpu::Linear, tflite::gpu::DataType::FLOAT32> tensor;
+ tensor.shape.v = shape.dim(shape.rank() - 1);
+ tensor.data.resize(shape.num_elements());
+ std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+ *alpha = std::move(tensor);
}
+ else
+ {
+ tflite::gpu::Tensor<tflite::gpu::HWC, tflite::gpu::DataType::FLOAT32> tensor;
+ if (shape.rank() == 3)
+ {
+ tensor.shape.h = shape.dim(0);
+ tensor.shape.w = shape.dim(1);
+ tensor.shape.c = shape.dim(2);
+ }
+ else if (shape.rank() == 4)
+ {
+ if (shape.dim(0) != 1)
+ {
+ return absl::UnimplementedError("Batch size is not equal to 1.");
+ }
+ tensor.shape.h = shape.dim(1);
+ tensor.shape.w = shape.dim(2);
+ tensor.shape.c = shape.dim(3);
+ }
+ else
+ {
+ return absl::InvalidArgumentError(
+ "Expected a 3D tensor of shape HxWxC or a 4D tensor of shape 1xHxWxC.");
+ }
+ tensor.data.resize(shape.num_elements());
+ std::memcpy(&tensor.data[0], _ctx.at(index).data()->base(), _ctx.at(index).operandSize());
+ *alpha = std::move(tensor);
+ }
+ return absl::OkStatus();
}
-KernelGenerator::KernelGenerator(const ir::Graph &graph,
- const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<TensorRegistry> &tensor_reg,
- const std::shared_ptr<CreationContext> &creation_context)
+KernelGenerator::KernelGenerator(
+ const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<TensorRegistry> &tensor_reg,
+ const std::shared_ptr<tflite::gpu::cl::CreationContext> &creation_context)
: basic::KernelGeneratorBase{graph}, _ctx(graph.operands()),
_operations_ctx(graph.operations()), _current_layout{graph.layout()},
_tensor_builder(tensor_builder), _tensor_reg(tensor_reg), _creation_context(creation_context)
std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
{
- auto ret = std::make_unique<exec::FunctionSequence>();
- ret->enableDynamicShapeInferer(false);
-
+ auto fn_seq = std::make_unique<exec::FunctionSequence>();
+ fn_seq->enableDynamicShapeInferer(false);
+ _operation_index = ind;
const auto &op = _graph.operations().at(ind);
op.accept(*this);
- ret->append(releaseFunction());
- return ret;
+ fn_seq->append(releaseFunction());
+ return fn_seq;
}
void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
- // const auto activation = node.param().activation;
+ tflite::gpu::OperationDef op_def;
+ op_def.precision = tflite::gpu::CalculationsPrecision::F32;
+
+ const bool lhs_const = _ctx.at(lhs_index).isConstant();
+ const bool rhs_const = _ctx.at(rhs_index).isConstant();
+
+ if (lhs_const && rhs_const)
+ {
+ throw std::runtime_error("No runtime input tensors for " + node.name());
+ }
+
+ auto fn = std::make_unique<ClFunction>(_creation_context);
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
- OperationDef op_def;
- op_def.precision = CalculationsPrecision::F32;
+ tflite::gpu::OperationType op_type = convertArithmeticType(node.param().arithmetic_type);
- op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(lhs_index)->descriptor);
- auto lhs_shape = _tensor_reg->getClTensorReserver(lhs_index)->shape;
+ if (!lhs_const && !rhs_const)
+ {
+ auto lhs_shape = _tensor_reg->getClTensor(lhs_index)->get_info()._shape;
+ auto rhs_shape = _tensor_reg->getClTensor(rhs_index)->get_info()._shape;
+
+ bool swap =
+ (op_type == tflite::gpu::OperationType::MUL) &&
+ (lhs_shape.h <= rhs_shape.h && lhs_shape.w <= rhs_shape.w && lhs_shape.c <= rhs_shape.c);
- op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(rhs_index)->descriptor);
- auto rhs_shape = _tensor_reg->getClTensorReserver(rhs_index)->shape;
+ auto first_index = swap ? rhs_index : lhs_index;
+ auto second_index = swap ? lhs_index : rhs_index;
- op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(ofm_index)->descriptor);
- auto out_shape = _tensor_reg->getClTensorReserver(ofm_index)->shape;
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(first_index)->get_info()._desc);
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(second_index)->get_info()._desc);
+ op_def.dst_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
- auto fn = std::make_unique<ClFunction>();
+ auto second_shape = _tensor_reg->getClTensor(second_index)->get_info()._shape;
- std::unique_ptr<GPUOperation> gpu_op;
- switch (node.param().arithmetic_type)
+ tflite::gpu::GPUOperation operation = CreateElementwiseTwoInput(op_def, op_type, second_shape);
+ gpu_op = std::make_unique<tflite::gpu::GPUOperation>(std::move(operation));
+
+ addClNode({first_index, second_index}, {ofm_index}, std::move(gpu_op));
+ }
+ else
{
- case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
- {
- std::vector<int> channels(2);
- channels[0] = lhs_shape.c;
- channels[1] = rhs_shape.c;
- SelectAdd(op_def, channels, out_shape.c, &gpu_op);
-
- auto ofm_tensor = _tensor_reg->getClTensor(ofm_index);
- auto lhs_tensor = _tensor_reg->getClTensor(lhs_index);
- auto rhs_tensor = _tensor_reg->getClTensor(rhs_index);
- gpu_op->SetSrc(lhs_tensor->handle(), ir::operation::BinaryArithmetic::Input::LHS);
- gpu_op->SetSrc(rhs_tensor->handle(), ir::operation::BinaryArithmetic::Input::RHS);
- gpu_op->SetDst(ofm_tensor->handle(), 0);
-
- fn->configure(_creation_context);
- fn->add_operation(std::move(gpu_op));
- break;
- }
- case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
- {
- // NYI
- break;
- }
- case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
- {
- // NYI
- break;
- }
- case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ auto non_const_index = rhs_const ? lhs_index : rhs_index;
+ auto const_index = rhs_const ? rhs_index : lhs_index;
+
+ op_def.dst_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(non_const_index)->get_info()._desc);
+
+ tflite::gpu::ElementwiseAttributes attr;
+
+ if (!readConstTensor(const_index, &attr.param).ok())
{
- // NYI
- break;
+ throw std::runtime_error("BinaryArithmetic unsupported constant tensor");
}
- default:
- assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
- break;
- }
+ tflite::gpu::GPUOperation operation =
+ CreateElementwise(_creation_context->GetGpuInfo(), op_def, op_type, attr);
+ gpu_op = absl::make_unique<tflite::gpu::GPUOperation>(std::move(operation));
+
+ addClNode({non_const_index}, {ofm_index}, std::move(gpu_op));
+ }
_return_fn = std::move(fn);
}
const auto param = node.param();
- OperationDef op_def;
- op_def.precision = CalculationsPrecision::F32;
+ tflite::gpu::OperationDef op_def;
+ op_def.precision = tflite::gpu::CalculationsPrecision::F32;
- op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(input)->descriptor);
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(input)->get_info()._desc);
- auto input_shape = _tensor_reg->getClTensorReserver(input)->shape;
- auto kernel_shape = _tensor_reg->getClTensorReserver(kernel)->shape;
- auto output_shape = _tensor_reg->getClTensorReserver(output)->shape;
- auto bias_shape = _tensor_reg->getClTensorReserver(bias)->shape;
+ auto input_shape = _tensor_reg->getClTensor(input)->get_info()._shape;
+ auto kernel_shape = _tensor_reg->getClTensor(kernel)->get_info()._shape;
+ auto output_shape = _tensor_reg->getClTensor(output)->get_info()._shape;
+ auto bias_shape = _tensor_reg->getClTensor(bias)->get_info()._shape;
- op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output)->descriptor);
+ op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output)->get_info()._desc);
- ModelHints hints;
- std::unique_ptr<GPUOperation> gpu_op; // = InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
+ tflite::gpu::ModelHints hints;
+ std::unique_ptr<tflite::gpu::GPUOperation>
+ gpu_op; // = InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
- auto input_tensor = _tensor_reg->getClTensor(input);
auto kernel_tensor = _tensor_reg->getClTensor(kernel);
auto bias_tensor = _tensor_reg->getClTensor(bias);
- auto output_tensor = _tensor_reg->getClTensor(output);
- Convolution2DAttributes attr;
+ tflite::gpu::Convolution2DAttributes attr;
attr.strides = ToHW(param.stride.vertical, param.stride.horizontal);
- attr.dilations = HW(std::max(static_cast<u_int32_t>(1), param.dilation.height_factor),
- std::max(static_cast<u_int32_t>(1), param.dilation.width_factor));
+ attr.dilations =
+ tflite::gpu::HW(std::max(static_cast<u_int32_t>(1), param.dilation.height_factor),
+ std::max(static_cast<u_int32_t>(1), param.dilation.width_factor));
bool is_weight = (_ctx.at(kernel).isConstant() ? true : false);
UpdatePadding(param.padding.type, input_shape, &attr);
- gpu_op = SelectConvolution(attr, output_shape, _creation_context->GetDeviceInfo(), op_def, hints);
- gpu_op->SetSrc(input_tensor->handle(), ir::operation::Conv2D::INPUT);
+ gpu_op = SelectConvolution(attr, output_shape, _creation_context->GetGpuInfo(), op_def, hints);
- auto fn = std::make_unique<ClFunction>();
+ tflite::gpu::cl::CLNode cl_node;
+ cl_node.inputs.resize(1);
+ cl_node.inputs[0] = input.value();
+ cl_node.outputs.resize(1);
- fn->configure(_creation_context);
+ auto fn = std::make_unique<ClFunction>(_creation_context);
const auto activation = node.param().activation;
{
case ir::Activation::NONE:
{
- gpu_op->SetDst(output_tensor->handle(), 0);
- fn->add_operation(std::move(gpu_op));
+ addClNode({input}, {output}, std::move(gpu_op));
break;
}
+ case ir::Activation::RELU:
case ir::Activation::RELU6:
{
- std::unique_ptr<GPUOperation> gpu_op_1;
- OperationDef op_def_1;
- std::shared_ptr<cl::Tensor> new_tensor = std::make_shared<cl::Tensor>();
-
- _new_tensors[output] = new_tensor;
- if (!CreateTensor(*_creation_context->context, output_shape,
- _tensor_reg->getClTensorReserver(output)->descriptor, new_tensor.get())
- .ok())
- {
- throw std::runtime_error("Error CreateTensor.");
- }
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op_1;
+ tflite::gpu::OperationDef op_def_1;
+ const auto shape = _ctx.at(output).shape();
+ auto new_ind = _tensor_reg->addNewClTensor(shape);
+
+ addClNode({input}, {new_ind}, std::move(gpu_op));
- gpu_op->SetDst(new_tensor.get(), 0);
- fn->add_operation(std::move(gpu_op));
- op_def_1.precision = CalculationsPrecision::F32;
- op_def_1.src_tensors.push_back(_tensor_reg->getClTensorReserver(output)->descriptor);
- op_def_1.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output)->descriptor);
+ op_def_1.precision = tflite::gpu::CalculationsPrecision::F32;
+ op_def_1.src_tensors.push_back(_tensor_reg->getClTensor(output)->get_info()._desc);
+ op_def_1.dst_tensors.push_back(_tensor_reg->getClTensor(output)->get_info()._desc);
- // - ReLU6: clip = 6, alpha = 0
- ReLUAttributes attr_1;
- attr_1.clip = 6;
+ tflite::gpu::ReLUAttributes attr_1;
+ if (activation == ir::Activation::RELU6)
+ {
+ attr_1.clip = 6;
+ }
+ else
+ {
+ attr_1.clip = 0;
+ }
attr_1.alpha = 0;
gpu_op_1 = SelectReLU(attr_1, op_def_1);
- gpu_op_1->SetSrc(new_tensor.get(), 0);
- gpu_op_1->SetDst(output_tensor->handle(), 0);
- fn->add_operation(std::move(gpu_op_1));
+ addClNode({new_ind}, {output}, std::move(gpu_op_1));
break;
}
default:
{
- throw std::runtime_error("gpu_cl KernelGenerator : Not supported operation yet");
+ throw std::runtime_error("gpu_cl KernelGenerator : Not supported Conv2D activiation");
}
}
-
_return_fn = std::move(fn);
}
const auto multiplier = node.param().multiplier;
- auto ofm_tensor = _tensor_reg->getClTensor(ofm_index);
- auto ifm_tensor = _tensor_reg->getClTensor(ifm_index);
- auto ker_tensor = _tensor_reg->getClTensor(ker_index);
- auto bias_tensor = _tensor_reg->getClTensor(bias_index);
-
bool is_weight = (_ctx.at(ker_index).isConstant() ? true : false);
- OperationDef op_def;
- op_def.precision = CalculationsPrecision::F32;
+ tflite::gpu::OperationDef op_def;
+ op_def.precision = tflite::gpu::CalculationsPrecision::F32;
- op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(ifm_index)->descriptor);
- auto input_shape = _tensor_reg->getClTensorReserver(ifm_index)->shape;
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(ifm_index)->get_info()._desc);
+ auto input_shape = _tensor_reg->getClTensor(ifm_index)->get_info()._shape;
- auto ker_shape = _tensor_reg->getClTensorReserver(ker_index)->shape;
+ auto ker_shape = _tensor_reg->getClTensor(ker_index)->get_info()._shape;
- op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(ofm_index)->descriptor);
- auto out_shape = _tensor_reg->getClTensorReserver(ofm_index)->shape;
- auto bias_shape = _tensor_reg->getClTensorReserver(bias_index)->shape;
+ op_def.dst_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+ auto out_shape = _tensor_reg->getClTensor(ofm_index)->get_info()._shape;
+ auto bias_shape = _tensor_reg->getClTensor(bias_index)->get_info()._shape;
- DepthwiseConvolution2DAttributes attr;
+ tflite::gpu::DepthwiseConvolution2DAttributes attr;
attr.strides = ToHW(stride.vertical, stride.horizontal);
- attr.dilations = HW(std::max(static_cast<u_int32_t>(1), dilation.height_factor),
- std::max(static_cast<u_int32_t>(1), dilation.width_factor));
+ attr.dilations = tflite::gpu::HW(std::max(static_cast<u_int32_t>(1), dilation.height_factor),
+ std::max(static_cast<u_int32_t>(1), dilation.width_factor));
if (is_weight)
{
attr.weights.shape.w = ker_shape.w;
attr.weights.shape.i = ker_shape.c;
attr.weights.data.resize(ker_shape.DimensionsProduct());
- memcpy(attr.weights.data.data(), _ctx.at(ker_index).data()->base(), ker_tensor->total_size());
+ memcpy(attr.weights.data.data(), _ctx.at(ker_index).data()->base(),
+ _ctx.at(ker_index).operandSize());
}
attr.bias.id = bias_index.value();
attr.bias.shape.v = bias_shape.b != 1 ? bias_shape.b : bias_shape.c;
attr.bias.data.resize(bias_shape.DimensionsProduct());
- memcpy(attr.bias.data.data(), _ctx.at(bias_index).data()->base(), bias_tensor->total_size());
+ memcpy(attr.bias.data.data(), _ctx.at(bias_index).data()->base(),
+ _ctx.at(bias_index).operandSize());
UpdatePadding(padding.type, input_shape, &attr);
if (multiplier != 1)
const int filter_width = ker_shape.w;
const int output_depth = out_shape.c;
- tflite::gpu::Tensor<OHWI, DataType::FLOAT32> weights;
+ tflite::gpu::Tensor<tflite::gpu::OHWI, tflite::gpu::DataType::FLOAT32> weights;
weights.id = attr.weights.id;
weights.shape = tflite::gpu::OHWI(output_depth, filter_height, filter_width, input_depth);
weights.data.resize(weights.shape.DimensionsProduct());
attr.weights = std::move(weights);
}
- auto fn = std::make_unique<ClFunction>();
- std::unique_ptr<GPUOperation> gpu_op;
+ auto fn = std::make_unique<ClFunction>(_creation_context);
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
if (is_weight)
{
- gpu_op = SelectDWConvolution(attr, _creation_context->GetDeviceInfo(), op_def);
+ gpu_op = SelectDWConvolution(attr, _creation_context->GetGpuInfo(), op_def);
}
else
{
throw std::runtime_error(
"No support of depthwise runtime weights with channel multiplier != 1");
}
- gpu_op = SelectDWConvolutionDynamicWeights(attr, _creation_context->GetDeviceInfo(), op_def);
+ gpu_op = SelectDWConvolutionDynamicWeights(attr, _creation_context->GetGpuInfo(), op_def);
}
- gpu_op->SetSrc(ifm_tensor->handle(), ir::operation::DepthwiseConv2D::Input::INPUT);
-
- fn->configure(_creation_context);
-
const auto activation = node.param().activation;
switch (activation)
{
case ir::Activation::NONE:
{
- gpu_op->SetDst(ofm_tensor->handle(), 0);
- fn->add_operation(std::move(gpu_op));
+ addClNode({ifm_index}, {ofm_index}, std::move(gpu_op));
break;
}
+ case ir::Activation::RELU:
case ir::Activation::RELU6:
{
- std::unique_ptr<GPUOperation> gpu_op_1;
- OperationDef op_def_1;
- std::shared_ptr<cl::Tensor> new_tensor = std::make_shared<cl::Tensor>();
-
- _new_tensors[ofm_index] = new_tensor;
- if (!CreateTensor(*_creation_context->context, out_shape,
- _tensor_reg->getClTensorReserver(ofm_index)->descriptor, new_tensor.get())
- .ok())
- {
- throw std::runtime_error("Error CreateTensor.");
- }
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op_1;
+ tflite::gpu::OperationDef op_def_1;
+ const auto shape = _ctx.at(ofm_index).shape();
+ auto new_ind = _tensor_reg->addNewClTensor(shape);
+
+ addClNode({ifm_index}, {new_ind}, std::move(gpu_op));
- gpu_op->SetDst(new_tensor.get(), 0);
- fn->add_operation(std::move(gpu_op));
- op_def_1.precision = CalculationsPrecision::F32;
- op_def_1.src_tensors.push_back(_tensor_reg->getClTensorReserver(ofm_index)->descriptor);
- op_def_1.dst_tensors.push_back(_tensor_reg->getClTensorReserver(ofm_index)->descriptor);
+ op_def_1.precision = tflite::gpu::CalculationsPrecision::F32;
- // - ReLU6: clip = 6, alpha = 0
- ReLUAttributes attr_1;
- attr_1.clip = 6;
+ op_def_1.src_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+ op_def_1.dst_tensors.push_back(_tensor_reg->getClTensor(ofm_index)->get_info()._desc);
+
+ tflite::gpu::ReLUAttributes attr_1;
+ if (activation == ir::Activation::RELU6)
+ {
+ attr_1.clip = 6;
+ }
+ else
+ {
+ attr_1.clip = 0;
+ }
attr_1.alpha = 0;
gpu_op_1 = SelectReLU(attr_1, op_def_1);
- gpu_op_1->SetSrc(new_tensor.get(), 0);
- gpu_op_1->SetDst(ofm_tensor->handle(), 0);
- fn->add_operation(std::move(gpu_op_1));
+ addClNode({new_ind}, {ofm_index}, std::move(gpu_op_1));
break;
}
default:
{
- throw std::runtime_error("gpu_cl KernelGenerator : Not supported operation yet");
+ throw std::runtime_error("gpu_cl KernelGenerator : Not supported DepthwiseConv2D acvivation");
}
}
void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
{
- std::unique_ptr<GPUOperation> gpu_op;
- auto fn = std::make_unique<ClFunction>();
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
+
+ tflite::gpu::OperationDef op_def;
+ op_def.precision = tflite::gpu::CalculationsPrecision::F32;
+ op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output_index)->get_info()._desc);
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(input_index)->get_info()._desc);
+
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
+ auto fn = std::make_unique<ClFunction>(_creation_context);
switch (node.param().op_type)
{
case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
case ir::operation::ElementwiseActivation::Type::RELU:
{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{
- node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
-
- OperationDef op_def;
- op_def.precision = CalculationsPrecision::F32;
- auto output_tensor = _tensor_reg->getClTensor(output_index);
- auto input_tensor = _tensor_reg->getClTensor(input_index);
- op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output_index)->descriptor);
- op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(input_index)->descriptor);
-
- ReLUAttributes attr;
+ tflite::gpu::ReLUAttributes attr;
if (ir::operation::ElementwiseActivation::Type::LEAKY_RELU == node.param().op_type)
{
attr.alpha = node.param().alpha;
attr.clip = node.param().alpha;
}
gpu_op = SelectReLU(attr, op_def);
- gpu_op->SetSrc(input_tensor->handle(), ir::operation::ElementwiseActivation::Input::INPUT);
- gpu_op->SetDst(output_tensor->handle(), 0);
- fn->configure(_creation_context);
- fn->add_operation(std::move(gpu_op));
-
- _return_fn = std::move(fn);
+ break;
+ }
+ case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+ {
+ if (_ctx.at(input_index).typeInfo().type() != ir::DataType::FLOAT32)
+ {
+ throw std::runtime_error{"Unsupported data type of LOGISTIC"};
+ }
+ tflite::gpu::GPUOperation operation =
+ CreateElementwiseOneInput(_creation_context->GetGpuInfo(), op_def,
+ convertElementwiseActivationType(node.param().op_type));
+ gpu_op = std::make_unique<tflite::gpu::GPUOperation>(std::move(operation));
+ break;
+ }
+ case ir::operation::ElementwiseActivation::Type::TANH:
+ {
+ tflite::gpu::GPUOperation operation = CreateElementwiseOneInput(
+ _creation_context->GetGpuInfo(), op_def, tflite::gpu::OperationType::TANH);
+ gpu_op = std::make_unique<tflite::gpu::GPUOperation>(std::move(operation));
break;
}
default:
- throw std::runtime_error("gpu_cl KernelGenerator : Not supported operation yet");
+ throw std::runtime_error(
+ "gpu_cl KernelGenerator : Not supported operation on ElementwiseActivation");
}
+ addClNode({input_index}, {output_index}, std::move(gpu_op));
+ _return_fn = std::move(fn);
}
void KernelGenerator::visit(const ir::operation::Pool2D &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
- OperationDef op_def;
- op_def.precision = CalculationsPrecision::F32;
+ tflite::gpu::OperationDef op_def;
+ op_def.precision = tflite::gpu::CalculationsPrecision::F32;
- op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(input_index)->descriptor);
- auto input_shape = _tensor_reg->getClTensorReserver(input_index)->shape;
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(input_index)->get_info()._desc);
+ auto input_shape = _tensor_reg->getClTensor(input_index)->get_info()._shape;
- op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output_index)->descriptor);
+ op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output_index)->get_info()._desc);
const auto kh = node.param().kh;
const auto kw = node.param().kw;
const auto stride = node.param().stride;
const auto op_type = convertPoolType(node.param().op_type);
- Pooling2DAttributes attributes;
+ tflite::gpu::Pooling2DAttributes attributes;
attributes.type = op_type;
- attributes.kernel = HW(kh > 0 ? kh : 1, kw > 0 ? kw : 1);
- attributes.strides =
- HW(stride.vertical > 0 ? stride.vertical : 1, stride.horizontal > 0 ? stride.horizontal : 1);
+ attributes.kernel = tflite::gpu::HW(kh > 0 ? kh : 1, kw > 0 ? kw : 1);
+ attributes.strides = tflite::gpu::HW(stride.vertical > 0 ? stride.vertical : 1,
+ stride.horizontal > 0 ? stride.horizontal : 1);
if (node.param().padding.type == ir::PaddingType::SAME)
{
}
else
{
- attributes.padding.prepended = HW(0, 0);
- attributes.padding.appended = HW(0, 0);
+ attributes.padding.prepended = tflite::gpu::HW(0, 0);
+ attributes.padding.appended = tflite::gpu::HW(0, 0);
}
- auto fn = std::make_unique<ClFunction>();
- std::unique_ptr<GPUOperation> gpu_op;
+ auto fn = std::make_unique<ClFunction>(_creation_context);
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
gpu_op = SelectPooling(attributes, op_def);
- auto input_tensor = _tensor_reg->getClTensor(input_index);
- auto output_tensor = _tensor_reg->getClTensor(output_index);
-
- gpu_op->SetSrc(input_tensor->handle(), ir::operation::Pool2D::Input::INPUT);
- gpu_op->SetDst(output_tensor->handle(), 0);
-
- fn->configure(_creation_context);
- fn->add_operation(std::move(gpu_op));
-
+ addClNode({input_index}, {output_index}, std::move(gpu_op));
_return_fn = std::move(fn);
}
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- OperationDef op_def;
- op_def.precision = CalculationsPrecision::F32;
+ tflite::gpu::OperationDef op_def;
+ op_def.precision = tflite::gpu::CalculationsPrecision::F32;
- op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(input_index)->descriptor);
- auto input_shape = _tensor_reg->getClTensorReserver(input_index)->shape;
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(input_index)->get_info()._desc);
+ auto input_shape = _tensor_reg->getClTensor(input_index)->get_info()._shape;
- op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output_index)->descriptor);
- auto output_shape = _tensor_reg->getClTensorReserver(output_index)->shape;
+ op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output_index)->get_info()._desc);
+ auto output_shape = _tensor_reg->getClTensor(output_index)->get_info()._shape;
- ReshapeAttributes attr;
+ tflite::gpu::ReshapeAttributes attr;
attr.new_shape = output_shape;
- auto fn = std::make_unique<ClFunction>();
- std::unique_ptr<GPUOperation> gpu_op;
+ auto fn = std::make_unique<ClFunction>(_creation_context);
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
const int src_channels = input_shape.c;
SelectReshape(src_channels, attr.new_shape.c, op_def, &gpu_op);
- auto input_tensor = _tensor_reg->getClTensor(input_index);
- auto output_tensor = _tensor_reg->getClTensor(output_index);
- gpu_op->SetSrc(input_tensor->handle(), ir::operation::Reshape::Input::INPUT);
- gpu_op->SetDst(output_tensor->handle(), 0);
-
- fn->configure(_creation_context);
- fn->add_operation(std::move(gpu_op));
-
+ addClNode({input_index}, {output_index}, std::move(gpu_op));
_return_fn = std::move(fn);
}
throw std::runtime_error("Softmax.beta != 1 is not supported in gpu_cl");
}
- OperationDef op_def;
- op_def.precision = CalculationsPrecision::F32;
+ tflite::gpu::OperationDef op_def;
+ op_def.precision = tflite::gpu::CalculationsPrecision::F32;
- op_def.dst_tensors.push_back(_tensor_reg->getClTensorReserver(output_index)->descriptor);
+ op_def.dst_tensors.push_back(_tensor_reg->getClTensor(output_index)->get_info()._desc);
- op_def.src_tensors.push_back(_tensor_reg->getClTensorReserver(input_index)->descriptor);
- auto input_shape = _tensor_reg->getClTensorReserver(input_index)->shape;
+ op_def.src_tensors.push_back(_tensor_reg->getClTensor(input_index)->get_info()._desc);
+ auto input_shape = _tensor_reg->getClTensor(input_index)->get_info()._shape;
- auto fn = std::make_unique<ClFunction>();
+ auto fn = std::make_unique<ClFunction>(_creation_context);
- std::unique_ptr<GPUOperation> gpu_op;
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op;
SelectSoftmax(input_shape, op_def, &gpu_op);
- auto output_tensor = _tensor_reg->getClTensor(output_index);
- auto input_tensor = _tensor_reg->getClTensor(input_index);
-
- gpu_op->SetSrc(input_tensor->handle(), ir::operation::Softmax::Input::INPUT);
- gpu_op->SetDst(output_tensor->handle(), 0);
-
- fn->configure(_creation_context);
- fn->add_operation(std::move(gpu_op));
+ addClNode({input_index}, {output_index}, std::move(gpu_op));
_return_fn = std::move(fn);
}
#include <backend/CustomKernelBuilder.h>
#include <backend/basic/KernelGeneratorBase.h>
+#include <backend/BackendContext.h>
#include <ir/Operands.h>
#include <ir/Operations.h>
#include <ir/Operations.Include.h>
std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
+ void get_operation(FunctionMap &Functions);
+
private:
void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::Softmax &) override;
+ absl::Status readConstTensor(const ir::OperandIndex &index, tflite::gpu::TensorOrScalar *param);
+ absl::Status readConstTensor(
+ const ir::OperandIndex &index,
+ absl::variant<tflite::gpu::Tensor<tflite::gpu::Linear, tflite::gpu::DataType::FLOAT32>,
+ tflite::gpu::Tensor<tflite::gpu::HWC, tflite::gpu::DataType::FLOAT32>> *alpha);
+ void addClNode(const std::vector<ir::OperandIndex> &inputs,
+ const std::vector<ir::OperandIndex> &outputs,
+ std::unique_ptr<tflite::gpu::GPUOperation> gpu_op);
private:
const ir::Operands &_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
std::shared_ptr<TensorRegistry> _tensor_reg;
std::shared_ptr<tflite::gpu::cl::CreationContext> _creation_context;
- ir::OperandIndexMap<std::shared_ptr<tflite::gpu::cl::Tensor>> _new_tensors;
+ std::vector<tflite::gpu::cl::CLNode> _nodes;
+ ir::OperationIndex _operation_index;
+ std::vector<ir::OperationIndex> _operation_indexes;
};
} // namespace gpu_cl
#ifndef __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
#define __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
-#include "ex/InferenceContextEx.h"
#include "operand/CLTensor.h"
#include "ir/OperandIndexMap.h"
#include "ir/OperandInfo.h"
#include "util/logging.h"
+#include "tensorflow/lite/delegates/gpu/spi.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
-#include "tensorflow/lite/delegates/gpu/cl/storage_type_util.h"
-#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+#include "tensorflow/lite/delegates/gpu/common/task/storage_type_util.h"
#include <cassert>
class MemoryManager
{
public:
- MemoryManager(tflite::gpu::cl::CLContext *context) : _context{context} {}
+ MemoryManager(tflite::gpu::cl::CLContext *context, tflite::gpu::CreateGpuModelInfo create_info,
+ const std::shared_ptr<tflite::gpu::cl::Environment> &environment)
+ : _context{context}, _create_info{create_info}, _environment{environment}
+ {
+ }
~MemoryManager() = default;
void allocate(void)
{
+ std::unique_ptr<tflite::gpu::TensorObjectConverterBuilder> converter_builder =
+ NewConverterBuilder(_environment.get());
for (const auto &tensor_entry : _tensors)
{
auto tensor = tensor_entry.second;
auto type = tensor->get_type();
- // if (type == TensorType::TENSOR_TYPE_DELETE) {
- // continue;
- // }
+ if (type == TensorType::TENSOR_TYPE_DELETE)
+ {
+ continue;
+ }
+
+ const auto &shape = tensor->get_info()._shape;
+ const auto &descriptor = tensor->get_info()._desc;
- const auto &t = tensor_reserver_.Get(tensor_entry.first.value());
- const auto &shape = t->shape;
- const auto &descriptor = t->descriptor;
if (!CreateTensor(*_context, shape, descriptor, tensor->handle()).ok())
{
std::runtime_error("Failed to CreateTensor");
switch (type)
{
case TensorType::TENSOR_TYPE_INPUT:
- tensor->writeConvertInit();
+ tensor->writeConvertInit(converter_builder.get(), _environment);
break;
case TensorType::TENSOR_TYPE_OUTPUT:
- tensor->readConvertInit();
+ tensor->readConvertInit(converter_builder.get(), _environment);
break;
default:
break;
{ /* DO NOTHING */
}
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
- std::shared_ptr<tflite::gpu::cl::Environment> environment,
- tflite::gpu::cl::DeviceInfo &device_info, TensorType type)
+ void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info, TensorType type)
{
- tflite::gpu::ValueId max_id = 0;
- auto data_type = DeduceDataTypeFromPrecision(create_info.precision);
- const auto shape = info.shape();
+ auto data_type = DeduceDataTypeFromPrecision(_create_info.precision);
- auto tensor = std::make_shared<operand::CLTensor>(shape.rank(), shape, environment, type);
- _tensors[ind] = tensor;
- tflite::gpu::BHWC t_shape;
- switch (shape.rank())
+ tflite::gpu::BHWC BHWC_shape = ToBHWC(info.shape());
+
+ tflite::gpu::TensorStorageType storage_type = _create_info.storage_type;
+ tflite::gpu::Layout layout =
+ BHWC_shape.b == 1 ? tflite::gpu::Layout::HWC : tflite::gpu::Layout::BHWC;
+
+ if (!SelectBestStorageType(_environment->device().GetInfo(), BHWC_shape, storage_type,
+ data_type, layout, &storage_type)
+ .ok())
{
- case 1:
- // B layout
- t_shape = tflite::gpu::BHWC(shape.dim(0), 1, 1, 1);
- break;
- case 2:
- // BC layout
- t_shape = tflite::gpu::BHWC(shape.dim(0), 1, 1, shape.dim(1));
- break;
- case 3:
- // BWC layout
- t_shape = tflite::gpu::BHWC(shape.dim(0), 1, shape.dim(1), shape.dim(2));
- break;
- case 4:
- // BHWC layout
- t_shape = tflite::gpu::BHWC(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3));
- break;
- default:
- break;
+ throw std::runtime_error("Failed to SelectBestStorageType");
}
+ auto tensor = std::make_shared<operand::CLTensor>(
+ info.shape().rank(), type, BHWC_shape,
+ tflite::gpu::TensorDescriptor{data_type, storage_type, layout});
+ _tensors[ind] = tensor;
+ }
- tflite::gpu::cl::TensorStorageType storage_type = create_info.storage_type;
- tflite::gpu::Layout layout =
- t_shape.b == 1 ? tflite::gpu::Layout::HWC : tflite::gpu::Layout::BHWC;
+ ir::OperandIndex addTensor(const ir::Shape &shape)
+ {
+ auto data_type = DeduceDataTypeFromPrecision(_create_info.precision);
- tflite::gpu::ValueId id = ind.value();
- storage_type =
- tflite::gpu::cl::SelectBestStorageType(device_info, t_shape, storage_type, data_type, layout);
- auto dummy = std::make_shared<InferenceContextEx::DummyTensor>();
- dummy->shape = t_shape;
- dummy->descriptor = tflite::gpu::cl::TensorDescriptor{data_type, storage_type, layout};
- tensor_reserver_.Add(id, dummy);
+ tflite::gpu::BHWC BHWC_shape = ToBHWC(shape);
- max_id = std::max(max_id, id);
+ tflite::gpu::TensorStorageType storage_type = _create_info.storage_type;
+ tflite::gpu::Layout layout =
+ BHWC_shape.b == 1 ? tflite::gpu::Layout::HWC : tflite::gpu::Layout::BHWC;
- tensor_reserver_.SetNext(max_id + 1);
+ if (!SelectBestStorageType(_environment->device().GetInfo(), BHWC_shape, storage_type,
+ data_type, layout, &storage_type)
+ .ok())
+ {
+ throw std::runtime_error("Failed to SelectBestStorageType");
+ }
+ auto ind = ir::OperandIndex(_new_id--);
+ auto tensor = std::make_shared<operand::CLTensor>(
+ shape.rank(), TensorType::TENSOR_TYPE_VALID, BHWC_shape,
+ tflite::gpu::TensorDescriptor{data_type, storage_type, layout});
+ _tensors[ind] = tensor;
+ return ind;
}
ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &tensors(void) { return _tensors; }
- InferenceContextEx::TensorReserverEx &tensorReservers(void) { return tensor_reserver_; }
-
private:
ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> _tensors;
- InferenceContextEx::TensorReserverEx tensor_reserver_;
tflite::gpu::cl::CLContext *_context;
+ tflite::gpu::CreateGpuModelInfo _create_info;
+ std::shared_ptr<tflite::gpu::cl::Environment> _environment;
+ uint32_t _new_id = UINT32_MAX;
};
} // namespace gpu_cl
#include "TensorManager.h"
-#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
using UsesType = cl_common::UsesType;
-TensorBuilder::TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr,
- tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
- const std::shared_ptr<tflite::gpu::cl::Environment> &environment)
- : _operands{operands}, _tensor_mgr{tensor_mgr}, _create_info{create_info}, _environment{
- environment}
+TensorBuilder::TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr)
+ : _operands{operands}, _tensor_mgr{tensor_mgr}
{
assert(_tensor_mgr);
}
{
auto lifetime_map = cl_common::createLifetimeMap(_lifetime_seq, _parent_map);
- for (auto &entry : lifetime_map)
+ for (const auto &entry : lifetime_map)
{
- auto &use = entry.second;
+ const auto &use = entry.second;
auto use_type = use.first;
auto use_index = use.second;
assert(use_index.valid());
assert(_tensor_mgr->constTensors().size() == 0);
assert(_tensor_mgr->nonconstTensors().size() == 0);
// Normal tensors
- for (auto &entry : _tensor_info_map)
+ for (const auto &entry : _tensor_info_map)
{
- auto ind = entry.first;
+ const auto &ind = entry.first;
if (_parent_map.count(ind) > 0)
continue;
auto type = _tensor_type_map.at(ind);
const auto &info = entry.second;
- _tensor_mgr->buildTensor(ind, info, _create_info, _environment, _environment->device().info_,
- type);
+ _tensor_mgr->buildTensor(ind, info, type);
}
}
+ir::OperandIndex TensorBuilder::addTensor(const ir::Shape &shape)
+{
+ return _tensor_mgr->addTensor(shape);
+}
+
} // namespace gpu_cl
} // namespace backend
} // namespace onert
class TensorBuilder
{
public:
- TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr,
- tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
- const std::shared_ptr<tflite::gpu::cl::Environment> &environment);
+ TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr);
/**
* @brief Register tensor information to allocate on ACL-CL backend
private:
void buildTensors(void);
ir::OperandIndex findRootParent(ir::OperandIndex index);
+ ir::OperandIndex addTensor(const ir::Shape &shape);
private:
const ir::Operands &_operands;
ir::OperandIndexMap<size_t> _uses_count_map;
std::unique_ptr<TensorManager> _tensor_mgr;
- tflite::gpu::cl::InferenceContext::CreateInferenceInfo _create_info;
- std::shared_ptr<tflite::gpu::cl::Environment> _environment;
// for linear executor
cl_common::LifetimeSeq _lifetime_seq;
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
-#define __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
-
-#include "absl/status/status.h"
-#include "tensorflow/lite/delegates/gpu/common/shape.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum TensorType
-{
- TENSOR_TYPE_VALID = 0,
- TENSOR_TYPE_INPUT = 1,
- TENSOR_TYPE_OUTPUT = 2,
- TENSOR_TYPE_DELETE = 3
-};
-
-absl::Status ExtractAxisFromIndex(int dims, int index, tflite::gpu::Axis *axis);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
void TensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
void TensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
- std::shared_ptr<tflite::gpu::cl::Environment> environment,
- tflite::gpu::cl::DeviceInfo &device_info, TensorType type)
+ TensorType type)
{
assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
if (info.isConstant())
{
- _const_mgr->buildTensor(ind, info, create_info, environment, device_info, type);
+ _const_mgr->buildTensor(ind, info, type);
_ind_to_mgr.insert({ind, *_const_mgr});
}
else
{
- _nonconst_mgr->buildTensor(ind, info, create_info, environment, device_info, type);
+ _nonconst_mgr->buildTensor(ind, info, type);
_ind_to_mgr.insert({ind, *_nonconst_mgr});
}
}
+ir::OperandIndex TensorManager::addTensor(const ir::Shape &shape)
+{
+ auto ind = _nonconst_mgr->addTensor(shape);
+ _ind_to_mgr.insert({ind, *_nonconst_mgr});
+
+ return ind;
+}
void TensorManager::startLifetime(const ir::OperandIndex &ind)
{
return _nonconst_mgr->tensors();
}
-std::shared_ptr<InferenceContextEx::DummyTensor> TensorManager::atR(const ir::OperandIndex &ind)
-{
- if (_nonconst_mgr->tensorReservers().HaveTensor(ind.value()))
- {
- return _nonconst_mgr->tensorReservers().Get(ind.value());
- }
- else if (_const_mgr->tensorReservers().HaveTensor(ind.value()))
- {
- return _const_mgr->tensorReservers().Get(ind.value());
- }
- return nullptr;
-}
-
-InferenceContextEx::TensorReserverEx &TensorManager::constTensorReservers(void)
-{
- return _const_mgr->tensorReservers();
-}
-
-InferenceContextEx::TensorReserverEx &TensorManager::nonconstTensorReservers(void)
-{
- return _nonconst_mgr->tensorReservers();
-}
-
void TensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
{
for (auto it : _nonconst_mgr->tensors())
#include "MemoryManager.h"
+#include "Utils.h"
+
#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
-#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
#include "ir/OperandInfo.h"
#include "ir/OperandIndexMap.h"
void deallocateConsts(void);
void deallocateNonconsts(void);
- void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
- tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
- std::shared_ptr<tflite::gpu::cl::Environment> environment,
- tflite::gpu::cl::DeviceInfo &device_info, TensorType type);
+ void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info, TensorType type);
+ ir::OperandIndex addTensor(const ir::Shape &shape);
std::shared_ptr<operand::ICLTensor> findTensorAsParent(const ir::OperandIndex &ind);
void finishLifetime(const ir::OperandIndex &ind);
std::shared_ptr<operand::ICLTensor> at(const ir::OperandIndex &ind);
- std::shared_ptr<InferenceContextEx::DummyTensor> atR(const ir::OperandIndex &ind);
-
- InferenceContextEx::TensorReserverEx &constTensorReservers(void);
- InferenceContextEx::TensorReserverEx &nonconstTensorReservers(void);
ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &constTensors(void);
ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &nonconstTensors(void);
ir::OperandIndexMap<MemoryManager &> _ind_to_mgr;
};
-inline TensorManager *createTensorManager(tflite::gpu::cl::CLContext *context)
+inline TensorManager *
+createTensorManager(tflite::gpu::cl::CLContext *context,
+ tflite::gpu::CreateGpuModelInfo create_info,
+ const std::shared_ptr<tflite::gpu::cl::Environment> &environment)
{
VERBOSE(createTensorManager) << "GPU-CL TensorManager" << std::endl;
- return new TensorManager(new MemoryManager(context), new MemoryManager(context));
+ return new TensorManager(new MemoryManager(context, create_info, environment),
+ new MemoryManager(context, create_info, environment));
}
} // namespace gpu_cl
auto getClTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind).get(); }
- auto getClTensorReserver(const ir::OperandIndex &ind) { return _tensor_mgr->atR(ind); }
+ ir::OperandIndex addNewClTensor(const ir::Shape &shape) { return _tensor_mgr->addTensor(shape); }
private:
TensorManager *_tensor_mgr;
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
+#define __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
+
+#include "absl/status/status.h"
+
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
+#include "tensorflow/lite/delegates/gpu/common/operations.h"
+
+#include "ir/operation/BinaryArithmetic.h"
+#include "ir/operation/ElementwiseActivation.h"
+#include "ir/operation/ElementwiseBinary.h"
+#include "ir/operation/ElementwiseUnary.h"
+#include "ir/operation/Pool2D.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+inline tflite::gpu::HW ToHW(int32_t h, int32_t w)
+{
+ return tflite::gpu::HW(h > 0 ? h : 1, w > 0 ? w : 1);
+}
+
+template <typename AttrT>
+inline void UpdatePadding(const ir::PaddingType type, const tflite::gpu::BHWC &input_shape,
+ AttrT *attr)
+{
+ if (type == ir::PaddingType::SAME)
+ {
+ attr->padding = CalculateSamePadding(input_shape, *attr);
+ }
+ else
+ {
+ attr->padding.prepended = tflite::gpu::HW(0, 0);
+ attr->padding.appended = tflite::gpu::HW(0, 0);
+ }
+}
+
+inline tflite::gpu::PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::Pool2D::PoolType::AVG:
+ return tflite::gpu::PoolingType::AVERAGE;
+ case ir::operation::Pool2D::PoolType::MAX:
+ return tflite::gpu::PoolingType::MAX;
+ default:
+ throw std::runtime_error("gpu_Cl KernelGenerator : Not supported operation yet");
+ }
+}
+
+inline tflite::gpu::BHWC ToBHWC(ir::Shape shape)
+{
+ switch (shape.rank())
+ {
+ case 1:
+ // B layout
+ return tflite::gpu::BHWC(shape.dim(0), 1, 1, 1);
+ break;
+ case 2:
+ // BC layout
+ return tflite::gpu::BHWC(shape.dim(0), 1, 1, shape.dim(1));
+ break;
+ case 3:
+ // BWC layout
+ return tflite::gpu::BHWC(shape.dim(0), 1, shape.dim(1), shape.dim(2));
+ break;
+ case 4:
+ // BHWC layout
+ return tflite::gpu::BHWC(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3));
+ break;
+ default:
+ break;
+ }
+ return tflite::gpu::BHWC();
+}
+
+inline bool CheckIfLinearConvertible(const ir::Shape *shape)
+{
+ if (shape->num_elements() <= 0)
+ {
+ return false;
+ }
+ for (int i = 0; i < shape->rank() - 1; ++i)
+ {
+ if (shape->dim(i) != 1)
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+inline tflite::gpu::OperationType
+convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
+{
+ switch (arithmetic_type_ir)
+ {
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ return tflite::gpu::OperationType::ADD;
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ return tflite::gpu::OperationType::SUB;
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ return tflite::gpu::OperationType::MUL;
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ return tflite::gpu::OperationType::DIV;
+ default:
+ throw std::runtime_error("Unsupported ArithmeticType");
+ }
+}
+
+inline tflite::gpu::OperationType
+convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+ return tflite::gpu::OperationType::SIGMOID;
+ default:
+ throw std::runtime_error("Unsupported ElementwiseActivationType");
+ }
+}
+
+enum TensorType
+{
+ TENSOR_TYPE_VALID = 0,
+ TENSOR_TYPE_INPUT = 1,
+ TENSOR_TYPE_OUTPUT = 2,
+ TENSOR_TYPE_DELETE = 3
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
-#define __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
-
-#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
-#include "tensorflow/lite/delegates/gpu/common/model.h"
-#include "absl/strings/str_cat.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class InferenceContextEx : public tflite::gpu::cl::InferenceContext
-{
-public:
- struct DummyTensor
- {
- tflite::gpu::BHWC shape;
- tflite::gpu::cl::TensorDescriptor descriptor;
-
- bool operator==(const DummyTensor &b) const
- {
- return shape == b.shape && descriptor == b.descriptor;
- }
- };
-
- class TensorReserverEx
- {
- public:
- tflite::gpu::ValueId Add(const std::shared_ptr<DummyTensor> &dummy)
- {
- reservations_[next_] = dummy;
- return next_++;
- }
- void Add(tflite::gpu::ValueId id, const std::shared_ptr<DummyTensor> &dummy)
- {
- reservations_[id] = dummy;
- }
- void SetNext(tflite::gpu::ValueId id) { next_ = id; }
- bool HaveTensor(tflite::gpu::ValueId id)
- {
- return reservations_.find(id) != reservations_.end();
- }
- std::shared_ptr<DummyTensor> Get(tflite::gpu::ValueId id) { return reservations_[id]; }
-
- std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>>
- GetTensorDescs() const
- {
- std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>> result;
- for (auto &v : reservations_)
- {
- tflite::gpu::cl::TensorDescriptor desc = v.second->descriptor;
- desc.shape.b = v.second->shape.b;
- desc.shape.h = v.second->shape.h;
- desc.shape.w = v.second->shape.w;
- desc.shape.d = 1;
- desc.shape.c = v.second->shape.c;
- result.push_back({v.first, desc});
- }
- return result;
- }
-
- void Add(const std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>>
- &tensors)
- {
- for (auto &v : tensors)
- {
- auto dummy = std::make_shared<DummyTensor>();
- dummy->descriptor = v.second;
- dummy->shape.b = v.second.shape.b;
- dummy->shape.h = v.second.shape.h;
- dummy->shape.w = v.second.shape.w;
- dummy->shape.c = v.second.shape.c;
- Add(v.first, dummy);
- }
- }
-
- private:
- // absl::flat_hash_map<ValueId, DummyTensor> reservations_;
- std::unordered_map<tflite::gpu::ValueId, std::shared_ptr<DummyTensor>> reservations_;
- tflite::gpu::ValueId next_ = 0;
- };
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
-#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
using namespace tflite::gpu::cl;
namespace operand
{
-CLTensor::CLTensor(size_t rank, ir::Shape shape,
- std::shared_ptr<tflite::gpu::cl::Environment> environment, TensorType type)
- : ICLTensor{rank, shape, environment, type}, _tensor(std::make_shared<Tensor>())
+CLTensor::CLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape,
+ tflite::gpu::TensorDescriptor desc)
+ : ICLTensor{rank, type, shape, desc}, _tensor(std::make_shared<Tensor>())
{
}
CLTensor() = delete;
public:
- CLTensor(size_t rank, ir::Shape shape, std::shared_ptr<tflite::gpu::cl::Environment> environment,
- TensorType type);
+ CLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape,
+ tflite::gpu::TensorDescriptor desc);
public:
const tflite::gpu::cl::Tensor *handle() const override;
fn(*this);
}
-void ICLTensor::writeConvertInit()
+void ICLTensor::writeConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+ std::shared_ptr<tflite::gpu::cl::Environment> environment)
{
+ _environment = environment;
TensorObjectDef input_def;
input_def.dimensions.b = handle()->Batch();
input_def.dimensions.h = handle()->Height();
output_def.object_def.data_type = handle()->GetDataType();
input_def.object_def.user_provided = false;
- _converter_builder = NewConverterBuilder(_environment.get());
- if (!_converter_builder->MakeConverter(input_def, permute_def, &_converter_to).ok())
+ if (!converter_builder->MakeConverter(input_def, permute_def, &_converter_to).ok())
{
throw std::runtime_error("Failed to make converter_to");
}
- if (!_converter_builder->MakeConverter(permute_def, output_def, &_converter_from).ok())
+ if (!converter_builder->MakeConverter(permute_def, output_def, &_converter_from).ok())
{
throw std::runtime_error("Failed to make converter_from");
}
}
-void ICLTensor::readConvertInit()
+void ICLTensor::readConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+ std::shared_ptr<tflite::gpu::cl::Environment> environment)
{
- _converter_builder = NewConverterBuilder(_environment.get());
-
+ _environment = environment;
TensorObjectDef input_def;
input_def.dimensions.b = handle()->Batch();
input_def.dimensions.h = handle()->Height();
TensorObjectDef output_def = permute_def;
output_def.object_def.object_type = ObjectType::CPU_MEMORY;
- if (!_converter_builder->MakeConverter(input_def, permute_def, &_converter_from).ok())
+ if (!converter_builder->MakeConverter(input_def, permute_def, &_converter_from).ok())
{
throw std::runtime_error("Failed to make converter_from");
}
- if (!_converter_builder->MakeConverter(permute_def, output_def, &_converter_to).ok())
+ if (!converter_builder->MakeConverter(permute_def, output_def, &_converter_to).ok())
{
throw std::runtime_error("Failed to make converter_to");
}
}
-void ICLTensor::enqueueWriteBuffer(const void *ptr, bool)
+void ICLTensor::enqueueWriteBuffer(const void *ptr, bool blocking)
{
- TensorObject input_obj =
- MakeReadableCpuMemory(absl::MakeSpan(static_cast<const float *>(ptr), _shape.num_elements()));
+ TensorObject input_obj = MakeReadableCpuMemory(
+ absl::MakeSpan(static_cast<const float *>(ptr), _info._shape.DimensionsProduct()));
TensorObject output_obj;
{
throw std::runtime_error("Failed to write cl buffer from cpu memory");
}
+
+ if (blocking && !_environment->queue()->WaitForCompletion().ok())
+ {
+ throw std::runtime_error("Failed to WaitForCompletion");
+ }
+
if (!_converter_from->Convert(permute_obj, output_obj).ok())
{
throw std::runtime_error("Failed to change layout");
}
}
-void ICLTensor::enqueueReadBuffer(void *ptr, bool)
+void ICLTensor::enqueueReadBuffer(void *ptr, bool blocking)
{
TensorObject input_obj;
}
TensorObject output_obj =
- MakeCpuMemory(absl::MakeSpan(static_cast<float *>(ptr), _shape.num_elements()));
+ MakeCpuMemory(absl::MakeSpan(static_cast<float *>(ptr), _info._shape.DimensionsProduct()));
if (!_converter_from->Convert(input_obj, permute_obj).ok())
{
{
throw std::runtime_error("Failed to read cl buffer");
}
+
+ if (blocking && !_environment->queue()->WaitForCompletion().ok())
+ {
+ throw std::runtime_error("Failed to WaitForCompletion");
+ }
}
} // namespace operand
#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
#include "tensorflow/lite/delegates/gpu/cl/environment.h"
-#include "TensorBuilderHelper.h"
+#include "Utils.h"
namespace onert
{
namespace operand
{
+struct TensorInfo
+{
+ tflite::gpu::BHWC _shape;
+ tflite::gpu::TensorDescriptor _desc;
+};
+
class ICLTensor : public ITensor
{
public:
ICLTensor(ICLTensor &&) = default;
ICLTensor &operator=(ICLTensor &&) = default;
- ICLTensor(size_t rank, ir::Shape shape, std::shared_ptr<tflite::gpu::cl::Environment> environment,
- TensorType type)
- : _rank{rank}, _shape{shape}, _environment(environment), _type(type)
+ ICLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape,
+ tflite::gpu::TensorDescriptor desc)
+ : _rank{rank}, _type(type), _info{shape, desc}
{
}
public:
uint8_t *buffer() const final { return reinterpret_cast<uint8_t *>(handle()->GetMemoryPtr()); }
- size_t total_size() const final { return _shape.num_elements() * sizeof(float); }
+ size_t total_size() const final { return _info._shape.DimensionsProduct() * sizeof(float); }
size_t calcOffset(const ir::Coordinates &) const final
{
throw std::runtime_error("ICLTensor::calcOffset() is not supported.");
throw std::runtime_error("ICLTensor::data_zero_points() is not supported.");
}
bool is_dynamic() const override { return false; }
- ir::Shape getShape() const override { return _shape; }
+ ir::Shape getShape() const override
+ {
+ tflite::gpu::BHWC shape = _info._shape;
+ switch (_rank)
+ {
+ case 1:
+ return ir::Shape{shape.b};
+ case 2:
+ return ir::Shape{shape.b, shape.c};
+ case 3:
+ return ir::Shape{shape.b, shape.w, shape.c};
+ case 4:
+ return ir::Shape{shape.b, shape.h, shape.w, shape.c};
+ default:
+ break;
+ }
+ return ir::Shape{};
+ }
bool has_padding() const override { return false; }
void access(const std::function<void(ITensor &tensor)> &fn) final;
bool needMemoryMap() const final { return true; }
void enqueueWriteBuffer(const void *ptr, bool blocking = true) final;
void enqueueReadBuffer(void *ptr, bool blocking = true) final;
- void writeConvertInit();
- void readConvertInit();
+ void writeConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+ std::shared_ptr<tflite::gpu::cl::Environment> environment);
+ void readConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+ std::shared_ptr<tflite::gpu::cl::Environment> environment);
+
TensorType get_type() { return _type; }
+ TensorType set_type(TensorType type) { return _type = type; }
+ const TensorInfo get_info() { return _info; }
public:
virtual const tflite::gpu::cl::Tensor *handle() const = 0;
private:
protected:
size_t _rank; // Actual rank (reflects extended rank)
- ir::Shape _shape;
- std::shared_ptr<tflite::gpu::cl::Environment> _environment;
TensorType _type;
- std::unique_ptr<tflite::gpu::TensorObjectConverterBuilder> _converter_builder;
+ TensorInfo _info;
tflite::gpu::cl::CLMemory _cl_memory;
+ std::shared_ptr<tflite::gpu::cl::Environment> _environment;
std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_to;
std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_from;
};
.operands()
.iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
- for (auto &it : ret)
+ for (auto &&it : ret)
{
auto &fn_seq = it.second;
fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
.operands()
.iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
- for (auto &it : ret)
+ for (auto &&it : ret)
{
auto &fn_seq = it.second;
fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchThreadPool.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+BatchThreadPool::BatchThreadPool(size_t num_threads) : _num_threads(num_threads), _stop_all(false)
+{
+ _worker_threads.reserve(_num_threads);
+ for (uint32_t thread_num = 0; thread_num < _num_threads; ++thread_num)
+ {
+ _worker_threads.emplace_back([this, thread_num]() { this->worker(thread_num); });
+ }
+}
+
+void BatchThreadPool::worker(uint32_t thread_num)
+{
+ while (true)
+ {
+ std::unique_lock<std::mutex> lock(_m_job_queue);
+ _cv_job_queue.wait(lock, [this]() { return !this->_job_queue.empty() || _stop_all; });
+ if (_stop_all && this->_job_queue.empty())
+ {
+ return;
+ }
+
+ // Pop a job in front of queue
+ auto job = std::move(_job_queue.front());
+ _job_queue.pop();
+ lock.unlock();
+
+ // Run the job
+ job(thread_num);
+ }
+}
+
+BatchThreadPool::~BatchThreadPool()
+{
+ _stop_all = true;
+ _cv_job_queue.notify_all();
+
+ for (auto &&t : _worker_threads)
+ {
+ t.join();
+ }
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_BATCH_THREAD_POOL_H__
+#define __ONERT_BACKEND_TRIX_BATCH_THREAD_POOL_H__
+
+#include <condition_variable>
+#include <functional>
+#include <future>
+#include <memory>
+#include <mutex>
+#include <queue>
+#include <thread>
+#include <vector>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+/**
+ * @brief Class that has a threadpool for batch-by-batch multi-threading
+ *
+ */
+class BatchThreadPool
+{
+public:
+ BatchThreadPool(size_t num_threads);
+ ~BatchThreadPool();
+
+ /**
+ * @brief
+ *
+ * @tparam F Type of the function for job
+ * @tparam Args Type of arguments of job
+ * @param f Function for job
+ * @param args Arguments of job
+ * @return std::future<typename std::result_of<F(uint32_t, Args...)>::type>
+ */
+ template <class F, class... Args>
+ std::future<typename std::result_of<F(uint32_t, Args...)>::type> enqueueJob(F &&f,
+ Args &&... args)
+ {
+ if (_stop_all)
+ {
+ throw std::runtime_error("Stop all threads in BatchThreadPool");
+ }
+
+ using return_type = typename std::result_of<F(uint32_t, Args...)>::type;
+ auto job = std::make_shared<std::packaged_task<return_type(uint32_t)>>(
+ std::bind(std::forward<F>(f), std::placeholders::_1, std::forward<Args>(args)...));
+ std::future<return_type> job_result_future = job->get_future();
+ {
+ // Push job in the assigned queue
+ std::lock_guard<std::mutex> lock(_m_job_queue);
+
+ // Push job
+ _job_queue.push([job](uint32_t thread_num) { (*job)(thread_num); });
+ }
+ _cv_job_queue.notify_one();
+
+ return job_result_future;
+ }
+
+private:
+ /**
+ * @brief Worker to run jobs
+ *
+ * @param thread_num Thread number on which worker is running
+ */
+ void worker(uint32_t thread_num);
+
+private:
+ /**
+ * @brief The number of threads
+ *
+ */
+ size_t _num_threads;
+
+ /**
+ * @brief Threads worked for jobs
+ *
+ */
+ std::vector<std::thread> _worker_threads;
+
+ /**
+ * @brief Queue for jobs
+ *
+ */
+ std::queue<std::function<void(uint32_t)>> _job_queue;
+
+ /**
+ * @brief condition_variables for _job_queue and _worker_threads
+ *
+ */
+ std::condition_variable _cv_job_queue;
+
+ /**
+ * @brief Mutex for the queue _job_queue
+ *
+ */
+ std::mutex _m_job_queue;
+
+ /**
+ * @brief Whether all threads are stopped
+ *
+ */
+ bool _stop_all;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_BATCH_THREAD_POOL_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Convert.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+data_layout convertDataLayout(const ir::Layout layout)
+{
+ switch (layout)
+ {
+ case ir::Layout::NCHW:
+ return DATA_LAYOUT_NCHW;
+ case ir::Layout::NHWC:
+ return DATA_LAYOUT_NHWC;
+ default:
+ throw std::runtime_error("Unknown Layout");
+ }
+}
+
+data_type convertDataType(const ir::DataType type)
+{
+ switch (type)
+ {
+ case ir::DataType::QUANT_UINT8_ASYMM:
+ return DATA_TYPE_QASYMM8;
+ case ir::DataType::QUANT_INT16_SYMM:
+ return DATA_TYPE_QSYMM16;
+ default:
+ throw std::runtime_error("Unsupported data type");
+ }
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_CONVERT_H__
+#define __ONERT_BACKEND_TRIX_CONVERT_H__
+
+#include <backend/IPortableTensor.h>
+#include <ir/DataType.h>
+#include <ir/Layout.h>
+
+#include <libnpuhost.h>
+#include <type_traits>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+/**
+ * @brief Convert type of layout from onert type to npu type
+ *
+ * @param layout Layout type in onert
+ * @return data_layout Layout type in npu
+ */
+data_layout convertDataLayout(const ir::Layout layout);
+
+/**
+ * @brief Convert type of data from onert type to npu type
+ *
+ * @param type Data type in onert
+ * @return data_type Data type in npu
+ */
+data_type convertDataType(const ir::DataType type);
+
+/**
+ * @brief Set the tensors_data_info object
+ *
+ * @tparam T Type of tensor based of IPortableTensor
+ * @param tensors Tensors that have data information
+ * @param info tensors_data_info to be set
+ */
+template <typename T, std::enable_if_t<std::is_base_of<IPortableTensor, T>::value, bool> = true>
+void setDataInfo(const std::vector<T *> &tensors, tensors_data_info *info)
+{
+ info->num_info = static_cast<uint32_t>(tensors.size());
+
+ for (uint32_t idx = 0; idx < info->num_info; ++idx)
+ {
+ info->info[idx].layout = convertDataLayout(tensors[idx]->layout());
+ info->info[idx].type = convertDataType(tensors[idx]->data_type());
+ }
+}
+
+/**
+ * @brief Set the generic_buffers object
+ *
+ * @tparam T Type of tensor based of IPortableTensor
+ * @param tensors Tensors that have buffer information
+ * @param buf generic_buffers to be set
+ */
+template <typename T, std::enable_if_t<std::is_base_of<IPortableTensor, T>::value, bool> = true>
+void setBuffers(const std::vector<T *> &tensors, generic_buffers *buf)
+{
+ buf->num_buffers = static_cast<uint32_t>(tensors.size());
+
+ for (uint32_t idx = 0; idx < buf->num_buffers; ++idx)
+ {
+ buf->bufs[idx].addr = tensors[idx]->buffer();
+ buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size());
+ buf->bufs[idx].type = BUFFER_MAPPED;
+ }
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_CONVERT_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DevContext.h"
+
+#include "Convert.h"
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+// All things related to npu device handle are gathered this Class, but when implementing npu
+// deamon, others except the context roles should be seperated.
+DevContext::DevContext() : _dev_handles{}, _model_ids{}, _meta_map{}
+{
+ auto dev_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP);
+ if (dev_count <= 0)
+ {
+ throw std::runtime_error("Unable to find TRIX NPU device");
+ }
+
+ // Get NPU device handles
+ for (int i = 0; i < dev_count; ++i)
+ {
+ npudev_h handle;
+ if (getNPUdeviceByType(&handle, NPUCOND_TRIV2_CONN_SOCIP, i) < 0)
+ {
+ throw std::runtime_error("Failed to get TRIX NPU device handle");
+ }
+ _dev_handles.emplace_back(handle);
+ }
+
+ // NOTE Do not change the number of threads as long as jobs in thread call
+ // the synchronous APIs such as submitNPU_request()
+ _batch_thread_pool = std::make_unique<BatchThreadPool>(_dev_handles.size());
+ // We need to careful not to create multiple `BatchThreadPool`. In case of multiple models, there
+ // may be a problem having multiple `BatchThreadPool` in current implementation. But if this
+ // creating thread pool is moved to npu deamon, I think this problem will be solved smoothly.
+}
+
+DevContext::~DevContext()
+{
+ // NOTE Must release _batch_thread_pool before releasing _dev_handles to wait for all threads to
+ // be terminated
+ _batch_thread_pool.reset(nullptr);
+
+ for (const auto &dev_handle : _dev_handles)
+ {
+ unregisterNPUmodel_all(dev_handle);
+ putNPUdevice(dev_handle);
+ }
+}
+
+ModelID DevContext::registerModel(const std::string &model_file_path)
+{
+ auto meta = getNPUmodel_metadata(model_file_path.c_str(), false);
+
+ if (meta == nullptr)
+ {
+ throw std::runtime_error("Unable to extract the model metadata");
+ }
+
+ generic_buffer file_info;
+ file_info.type = BUFFER_FILE;
+ file_info.filepath = model_file_path.c_str();
+ file_info.size = meta->size;
+
+ ModelID model_id;
+
+ for (uint32_t dev_num = 0; dev_num < _dev_handles.size(); ++dev_num)
+ {
+ // Register model for each device
+ uint32_t model_id_at_device;
+ if (registerNPUmodel(_dev_handles.at(dev_num), &file_info, &model_id_at_device) < 0)
+ {
+ throw std::runtime_error("Failed to register npu model");
+ }
+
+ if (dev_num == 0)
+ {
+ model_id = model_id_at_device;
+ _meta_map[model_id_at_device] = std::shared_ptr<npubin_meta>(meta);
+ }
+ else
+ {
+ _meta_map[model_id_at_device] = _meta_map[model_id];
+ }
+
+ _model_ids[model_id].resize(dev_num + 1);
+ _model_ids[model_id].at(dev_num) = model_id_at_device;
+ }
+
+ // Return the model id for device 0 only
+ return model_id;
+}
+
+void DevContext::unRegisterModel(ModelID model_id)
+{
+ for (uint32_t dev_num = 0; dev_num < _dev_handles.size(); ++dev_num)
+ {
+ const auto model_id_at_device = _model_ids.at(model_id).at(dev_num);
+ const auto &dev_handle = _dev_handles.at(dev_num);
+
+ // Remove meta data
+ _meta_map.erase(model_id_at_device);
+
+ // Unregister Model for each device
+ unregisterNPUmodel(dev_handle, model_id_at_device);
+ }
+ // Remove model IDs
+ _model_ids.erase(model_id);
+}
+
+void DevContext::requestRun(ModelID model_id, input_buffers *input_bufs, tensors_data_info *in_info,
+ output_buffers *output_bufs, tensors_data_info *out_info,
+ size_t batch_size)
+{
+ if (batch_size > 1)
+ {
+ if (in_info->num_info != 1)
+ {
+ throw std::runtime_error("Supported only an input that has batch now");
+ }
+ if (out_info->num_info != 1)
+ {
+ throw std::runtime_error("Supported only one output now");
+ }
+
+ if (input_bufs->bufs[0].size % batch_size != 0)
+ {
+ throw std::runtime_error("Invalid batch size. batch size :" + std::to_string(batch_size) +
+ ", input buffer size : " + std::to_string(input_bufs->bufs[0].size));
+ }
+
+ if (output_bufs->bufs[0].size % batch_size != 0)
+ {
+ throw std::runtime_error(
+ "Invalid batch size. batch size :" + std::to_string(batch_size) +
+ ", output tensor size : " + std::to_string(output_bufs->bufs[0].size));
+ }
+
+ // inputs/outputs for each batch
+ std::vector<input_buffers> in_buffers_vec(batch_size);
+ std::vector<output_buffers> out_buffers_vec(batch_size);
+
+ // Run on thread pool
+ std::vector<std::future<int32_t>> batch_futures;
+ for (uint32_t batch_num = 0; batch_num < batch_size; ++batch_num)
+ {
+ // Enqueue jobs
+ // The in_info and out_info are always the same even if they are divided by batch, so they are
+ // used as they are.
+ auto future = _batch_thread_pool->enqueueJob(
+ [batch_size, in_info, out_info,
+ this](uint32_t dev_num, ModelID model_id, const input_buffers *input_bufs,
+ const output_buffers *output_bufs, uint32_t batch_num) -> int32_t {
+ // Set buffers of inputs/outputs for each batch
+ // TODO Support multiple inputs/outputs
+ input_buffers in_batch_buffers;
+ in_batch_buffers.num_buffers = input_bufs->num_buffers;
+ const uint64_t in_batch_offset = input_bufs->bufs[0].size / batch_size;
+ setBufferByBatch(input_bufs->bufs[0], batch_num, in_batch_offset,
+ &in_batch_buffers.bufs[0]);
+
+ output_buffers out_batch_buffers;
+ out_batch_buffers.num_buffers = output_bufs->num_buffers;
+ const uint64_t out_batch_offset = output_bufs->bufs[0].size / batch_size;
+ setBufferByBatch(output_bufs->bufs[0], batch_num, out_batch_offset,
+ &out_batch_buffers.bufs[0]);
+
+ try
+ {
+ // dev_num is the same as the thread number in _batch_thread_pool
+ this->runOneBatch(dev_num, model_id, &in_batch_buffers, in_info, &out_batch_buffers,
+ out_info);
+ }
+ catch (...)
+ {
+ _eptr = std::current_exception();
+ }
+
+ return batch_num;
+ },
+ model_id, input_bufs, output_bufs, batch_num);
+ batch_futures.emplace_back(std::move(future));
+ }
+
+ for (auto &&future : batch_futures)
+ {
+ future.get();
+ }
+
+ if (_eptr)
+ {
+ std::exception_ptr eptr(nullptr);
+ _eptr.swap(eptr);
+ std::rethrow_exception(eptr);
+ }
+ }
+ else
+ {
+ runOneBatch(0, model_id, input_bufs, in_info, output_bufs, out_info);
+ }
+}
+
+void DevContext::runOneBatch(uint32_t dev_num, ModelID model_id, input_buffers *input_bufs,
+ tensors_data_info *in_info, output_buffers *output_bufs,
+ tensors_data_info *out_info)
+{
+ const auto &model_id_at_device = _model_ids.at(model_id).at(dev_num);
+
+ const auto meta = _meta_map.at(model_id_at_device);
+ if (meta->input_seg_num != in_info->num_info)
+ {
+ throw std::runtime_error("The number of inputs does not match to model input seg num");
+ }
+
+ if (meta->output_seg_num != out_info->num_info)
+ {
+ throw std::runtime_error("The number of outputs does not match to model output seg num");
+ }
+
+ const auto &dev_handle = _dev_handles.at(dev_num);
+ int req_id;
+
+ if (auto error_code = createNPU_request(dev_handle, model_id_at_device, &req_id))
+ {
+ throw std::runtime_error("Unable to create NPU request with model id (" +
+ std::to_string(model_id_at_device) + ")" +
+ " error code : " + std::to_string(error_code));
+ }
+
+ if (auto error_code =
+ setNPU_requestData(dev_handle, req_id, input_bufs, in_info, output_bufs, out_info))
+ {
+ removeNPU_request(dev_handle, req_id);
+ throw std::runtime_error("Unable to create NPU request for model id (" +
+ std::to_string(model_id_at_device) + ")" +
+ " error code : " + std::to_string(error_code));
+ }
+
+ // NOTE submitNPU_request is not thread-safe(?). It is rarely hanging(unresponsive).
+ // Ultimately, to solve this problem, we have to either use other thread-safe API or
+ // change submitNPU_request to be thread-safe, but both works take time.
+ // As a workaround, let's allow hanging thread.
+ // TODO Change submitNPU_request to be thread-safe or replaced with other thread-safe API
+ std::packaged_task<int(npudev_h, int)> task(submitNPU_request);
+ auto f = task.get_future();
+ std::thread thread_submit_request(std::move(task), dev_handle, req_id);
+ auto status = f.wait_until(std::chrono::system_clock::now() + std::chrono::seconds(60));
+ if (status == std::future_status::timeout)
+ {
+ // There is no way to terminate hanging submitNPU_request from the outside.
+ // If a hanging thread is detached, it will remain as a hanging thread. Even so, it's better
+ // than having the main thread hanging.
+ thread_submit_request.detach();
+
+ // TODO Enable removeNPU_request after resolving hanging.
+ // removeNPU_request(dev_handle, req_id);
+ throw std::runtime_error("The npu API \"submitNPU_request\" timeout");
+ }
+
+ auto error_code = f.get();
+ thread_submit_request.join();
+ if (error_code != 0)
+ {
+ removeNPU_request(dev_handle, req_id);
+ throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
+ ")" + " error code : " + std::to_string(error_code));
+ }
+
+ if (auto error_code = removeNPU_request(dev_handle, req_id))
+ {
+ throw std::runtime_error("Unable to remove NPU request with req id (" + std::to_string(req_id) +
+ ")" + " error code : " + std::to_string(error_code));
+ }
+}
+
+void DevContext::setBufferByBatch(const generic_buffer &origin_buf, uint32_t batch_num,
+ uint64_t batch_offset, generic_buffer *batch_buf)
+{
+ batch_buf->addr = reinterpret_cast<uint8_t *>(origin_buf.addr) + batch_num * batch_offset;
+ batch_buf->size = batch_offset;
+ batch_buf->type = BUFFER_MAPPED;
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
#ifndef __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
#define __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
+#include "BatchThreadPool.h"
+
#include <libnpuhost.h>
+#include <memory>
+#include <string>
+#include <unordered_map>
namespace onert
{
namespace trix
{
+using ModelID = uint32_t;
+
+/**
+ * @brief NPU device context of trix backend
+ *
+ */
class DevContext
{
public:
- DevContext()
- {
- auto device_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP);
- // TODO: x64 platform has 3 cores. We do not support more that 2 cores for now.
- if (device_count > 2)
- {
- device_count = 2;
- }
-
- if (device_count <= 0)
- {
- throw std::runtime_error("Unable to find TRIX NPU device");
- }
-
- for (int i = 0; i < device_count; i++)
- {
- npudev_h h;
- if (getNPUdeviceByType(&h, NPUCOND_TRIV2_CONN_SOCIP, i) < 0)
- {
- throw std::runtime_error("Failed to get TRIX NPU device handle");
- }
- _dev_handles.push_back(h);
- }
- }
-
- ~DevContext()
- {
- for (auto h : _dev_handles)
- {
- if (h != nullptr)
- {
- unregisterNPUmodel_all(h);
- putNPUdevice(h);
- }
- }
- }
-
- npudev_h getDev(int i) { return _dev_handles[i]; }
- int getDevSize() { return _dev_handles.size(); }
-
- template <typename T> void setDataInfo(tensors_data_info *info, std::vector<T *> &tensors)
- {
- info->num_info = static_cast<uint32_t>(tensors.size());
-
- for (uint32_t idx = 0; idx < info->num_info; ++idx)
- {
- info->info[idx].layout = convertDataLayout(tensors[idx]->layout());
- info->info[idx].type = convertDataType(tensors[idx]->data_type());
- }
- }
-
- template <typename T>
- void setBuffer(generic_buffers *buf, std::vector<T *> &tensors, int batch_size, int batch_index)
- {
- buf->num_buffers = static_cast<uint32_t>(tensors.size());
-
- for (uint32_t idx = 0; idx < buf->num_buffers; ++idx)
- {
- buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size() / batch_size);
- buf->bufs[idx].addr = tensors[idx]->buffer() + (batch_index * buf->bufs[idx].size);
- buf->bufs[idx].type = BUFFER_MAPPED;
- }
- }
+ /**
+ * @brief Construct a new device Context object
+ *
+ */
+ DevContext();
+
+ /**
+ * @brief Destroy the device Context object
+ *
+ */
+ ~DevContext();
+
+ DevContext(const DevContext &) = delete;
+ DevContext &operator=(const DevContext &) = delete;
+
+ /**
+ * @brief Register a trix model for all NPU devices
+ *
+ * @param model_file_path File path of a trix model
+ * @return ModelID Internal ID of the trix model
+ */
+ ModelID registerModel(const std::string &model_file_path);
+
+ /**
+ * @brief Unregister a trix model
+ *
+ * @param model_id Internal ID of the trix model to be unregistered
+ */
+ void unRegisterModel(ModelID model_id);
+
+ /**
+ * @brief Request a trix model to be run on NPU
+ *
+ * @param model_id Internal ID of a trix model
+ * @param input_bufs Buffer data of inputs
+ * @param in_info Data info of inputs
+ * @param output_bufs Buffer data of outputs
+ * @param out_info data info of outputs
+ * @param batch_size Batch size
+ */
+ void requestRun(ModelID model_id, input_buffers *input_bufs, tensors_data_info *in_info,
+ output_buffers *output_bufs, tensors_data_info *out_info, size_t batch_size);
private:
- data_layout convertDataLayout(const ir::Layout layout)
- {
- switch (layout)
- {
- case ir::Layout::NCHW:
- return DATA_LAYOUT_NCHW;
- case ir::Layout::NHWC:
- return DATA_LAYOUT_NHWC;
- default:
- throw std::runtime_error("Unknown Layout");
- }
- }
-
- data_type convertDataType(const ir::DataType type)
- {
- switch (type)
- {
- case ir::DataType::QUANT_UINT8_ASYMM:
- return DATA_TYPE_QASYMM8;
- case ir::DataType::QUANT_INT16_SYMM:
- return DATA_TYPE_QSYMM16;
- default:
- throw std::runtime_error("Unsupported data type");
- }
- }
+ /**
+ * @brief Rquest one batch of a trix model to be run on a device of NPU
+ *
+ * @param dev_num Device number
+ * @param model_id Internal ID of a trix model
+ * @param input_bufs Buffer data of inputs
+ * @param in_info Data info of inputs
+ * @param output_bufs Buffer data of outputs
+ * @param out_info data info of outputs
+ */
+ void runOneBatch(uint32_t dev_num, ModelID model_id, input_buffers *input_bufs,
+ tensors_data_info *in_info, output_buffers *output_bufs,
+ tensors_data_info *out_info);
+
+ /**
+ * @brief Set the buffer object by batch
+ *
+ * @param origin_buf Buffer object that has all batches
+ * @param batch_num Batch number
+ * @param batch_offset Size of a batch
+ * @param batch_buf One batch buffer object to be set
+ */
+ void setBufferByBatch(const generic_buffer &origin_buf, uint32_t batch_num, uint64_t batch_offset,
+ generic_buffer *batch_buf);
private:
- // NPU device handles
+ /**
+ * @brief NPU device handles
+ *
+ */
std::vector<npudev_h> _dev_handles;
+
+ /**
+ * @brief Threadpool for batch-by-batch multi-threading
+ *
+ */
+ std::unique_ptr<BatchThreadPool> _batch_thread_pool;
+
+ // TODO Change key to internal trix model context(?) if it is needed
+ /**
+ * @brief Map for ID of models
+ * Internal Model ID : Model ID array for each device
+ *
+ */
+ std::unordered_map<ModelID, std::vector<uint32_t>> _model_ids;
+
+ /**
+ * @brief Map for meta data
+ * Model ID at each device : meta data
+ *
+ */
+ std::unordered_map<uint32_t, std::shared_ptr<npubin_meta>> _meta_map;
+
+ /**
+ * @brief Exception pointer captured whthin threads
+ *
+ */
+ std::exception_ptr _eptr;
};
} // namespace trix
using ir::operation::Bulk;
std::vector<IPortableTensor *> output_tensors;
- for (auto &ofm_idx : node.getOutputs())
+ for (const auto &ofm_idx : node.getOutputs())
output_tensors.emplace_back(_tensor_reg->getPortableTensor(ofm_idx));
std::vector<const IPortableTensor *> input_tensors;
- for (auto &ifm_idx : node.getInputs())
+ for (const auto &ifm_idx : node.getInputs())
input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
// parameters
*/
#include "BulkLayer.h"
-#include <util/logging.h>
-#include <libnpuhost.h>
-#include <future>
+#include "../Convert.h"
namespace onert
{
namespace ops
{
-BulkLayer::BulkLayer() : _inputs(), _outputs(), _model_id(0), _meta(nullptr), _dev_context(nullptr)
+BulkLayer::BulkLayer() : _inputs(), _outputs(), _model_id(0), _dev_context(nullptr)
{
// DO NOTHING
}
-BulkLayer::~BulkLayer() { free(_meta); }
+BulkLayer::~BulkLayer() { _dev_context->unRegisterModel(_model_id); }
void BulkLayer::configure(const std::vector<const IPortableTensor *> &inputs,
std::vector<IPortableTensor *> &outputs, std::string binary_path,
_inputs = inputs;
_outputs = outputs;
_dev_context = dev_context;
-
- _meta = getNPUmodel_metadata(binary_path.c_str(), false);
- if (_meta == nullptr)
- {
- throw std::runtime_error("Unable to extract the model metadata");
- }
-
- _model_id.resize(_dev_context->getDevSize());
-
- generic_buffer model_file;
- model_file.type = BUFFER_FILE;
- model_file.filepath = binary_path.c_str();
- model_file.size = _meta->size;
-
- for (int i = 0; i < _dev_context->getDevSize(); i++)
- {
- if (registerNPUmodel(dev_context->getDev(i), &model_file, &_model_id[i]) < 0)
- {
- throw std::runtime_error("Failed to register npu model");
- }
- }
-}
-
-void single_job(npudev_h dev, int req_id, input_buffers *input_buf, tensors_data_info *in_info,
- output_buffers *output_buf, tensors_data_info *out_info)
-{
- if (setNPU_requestData(dev, req_id, input_buf, in_info, output_buf, out_info))
- {
- throw std::runtime_error("Unable to create NPU request for red_id (" + std::to_string(req_id) +
- ")");
- }
-
- if (submitNPU_request(dev, req_id))
- {
- throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
- ")");
- }
+ _model_id = _dev_context->registerModel(binary_path);
}
void BulkLayer::run()
{
- // TODO: Remove too many assumption
- // We assume user wants batch execution if user's input size is multiples of model's input size
- int user_input_batch = (_inputs[0]->get_info().shape()).dim(0);
- int model_input_batch = _meta->input_seg_dims[0][0];
- int batch_size = user_input_batch / model_input_batch;
- bool is_batch_execution = (batch_size != 1 ? true : false);
-
- std::vector<int> req_id(_dev_context->getDevSize());
-
- for (int i = 0; i < _dev_context->getDevSize(); i++)
- {
- if (createNPU_request(_dev_context->getDev(i), _model_id[i], &req_id[i]))
- {
- throw std::runtime_error("Unable to create NPU request with model id (" +
- std::to_string(_model_id[i]) + ")");
- }
- }
-
- if (_meta->input_seg_num != _inputs.size())
- {
- throw std::runtime_error("input size does not match to model input seg num");
- }
-
- if (_meta->output_seg_num != _outputs.size())
- {
- throw std::runtime_error("output size does not match to model output seg num");
- }
-
tensors_data_info in_info;
tensors_data_info out_info;
- _dev_context->setDataInfo<const IPortableTensor>(&in_info, _inputs);
- _dev_context->setDataInfo<IPortableTensor>(&out_info, _outputs);
+ setDataInfo(_inputs, &in_info);
+ setDataInfo(_outputs, &out_info);
- std::vector<input_buffers> input_buf;
- std::vector<output_buffers> output_buf;
- input_buf.resize(_dev_context->getDevSize());
- output_buf.resize(_dev_context->getDevSize());
-
- std::vector<std::future<void>> f(_dev_context->getDevSize());
-
- const int num_cores = _dev_context->getDevSize();
- if (is_batch_execution)
- {
- // TODO: Support for general number of cores(>2)
- // Here we assume that 2 trix cores
- for (int i = 0; i < (batch_size); i = i + num_cores)
- {
- for (int core = 0; core < num_cores; core++)
- {
- _dev_context->setBuffer<const IPortableTensor>(&input_buf[core], _inputs, batch_size,
- i + core);
- _dev_context->setBuffer<IPortableTensor>(&output_buf[core], _outputs, batch_size, i + core);
- }
- for (int core = 0; core < num_cores; core++)
- {
-
- if (i + core < batch_size)
- {
- f[core] =
- std::async(std::launch::async, &single_job, _dev_context->getDev(core), req_id[core],
- &input_buf[core], &in_info, &output_buf[core], &out_info);
- }
- }
- for (int core = 0; core < num_cores; core++)
- {
- f[core].wait();
- }
- }
- }
- else
- {
- _dev_context->setBuffer<const IPortableTensor>(&input_buf[0], _inputs, batch_size, 0);
- _dev_context->setBuffer<IPortableTensor>(&output_buf[0], _outputs, batch_size, 0);
-
- single_job(_dev_context->getDev(0), req_id[0], &input_buf[0], &in_info, &output_buf[0],
- &out_info);
- }
+ input_buffers input_bufs;
+ output_buffers output_bufs;
+ setBuffers(_inputs, &input_bufs);
+ setBuffers(_outputs, &output_bufs);
- for (int i = 0; i < _dev_context->getDevSize(); i++)
+ size_t batch_size = 1;
+ // TODO Remove this assumption
+ if (_inputs.size() == 1 && _outputs.size() == 1 && _inputs.at(0)->getShape().dim(0) > 1)
{
- if (removeNPU_request(_dev_context->getDev(i), req_id[i]))
- {
- throw std::runtime_error("Unable to remove NPU request with req id (" +
- std::to_string(req_id[i]) + ")");
- }
+ batch_size = _inputs.at(0)->getShape().dim(0);
}
+ _dev_context->requestRun(_model_id, &input_bufs, &in_info, &output_bufs, &out_info, batch_size);
}
void BulkLayer::prepare()
std::vector<const IPortableTensor *> _inputs;
std::vector<IPortableTensor *> _outputs;
- std::vector<uint32_t> _model_id;
- npubin_meta *_meta;
+ ModelID _model_id;
std::shared_ptr<DevContext> _dev_context;
};
.operands()
.iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
- for (auto &it : ret)
+ for (auto &&it : ret)
{
auto &fn_seq = it.second;
fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
target_link_libraries(${TEST_ONERT_CORE} gtest gtest_main dl ${LIB_PTHREAD})
add_test(${TEST_ONERT_CORE} ${TEST_ONERT_CORE})
-install(TARGETS ${TEST_ONERT_CORE} DESTINATION unittest_standalone)
+install(TARGETS ${TEST_ONERT_CORE} DESTINATION unittest)
tensor_builder->notifyFirstUse(ind);
}
- for (auto &pair : def_map)
+ for (const auto &pair : def_map)
{
- if (pair.second == 0)
- tensor_builder->notifyFirstUse(pair.first);
+ const auto &ind = pair.first;
+ const auto def_count = pair.second;
+ if (def_count == 0)
+ tensor_builder->notifyFirstUse(ind);
}
// This is a workaround to keep the operands over the execution
// (the operands look like they are unused)
std::vector<ir::OperandIndex> operands_last_until_end;
- for (auto &pair : uses_map)
+ for (const auto &pair : uses_map)
{
- if (pair.second == 0)
- operands_last_until_end.push_back(pair.first);
+ const auto &ind = pair.first;
+ const auto use_count = pair.second;
+ if (use_count == 0)
+ operands_last_until_end.push_back(ind);
}
// At each operation,
}
}
- for (auto &ind : operands_last_until_end)
+ for (const auto &ind : operands_last_until_end)
{
tensor_builder->notifyLastUse(ind);
}
#ifndef __ONERT_COMPILER_COMPILE_H_
#define __ONERT_COMPILER_COMPILE_H_
+#include "CompilerOptions.h"
+#include "ICompiler.h"
#include "ir/NNPkg.h"
-#include "exec/Executors.h"
-#include "util/TracingCtx.h"
namespace onert
{
-
namespace compiler
{
-enum class State
-{
- CREATED, // Before compilation
- COMPILED // Success compilation
-};
-
-struct ManualSchedulerOptions
-{
-public:
- void setBackendMap(const std::string &str);
-
-public:
- std::string backend_for_all;
- std::unordered_map<ir::OpCode, std::string> opcode_to_backend;
- std::unordered_map<ir::OperationIndex, std::string> index_to_backend;
-};
-
-struct PartialGraphOptions
-{
- std::unordered_map<ir::OperationIndex, ir::SubgraphIndex> index_to_graph;
-};
-
-class CompilerOptions
-{
-public:
- // Set default values for CompilerOptions
- // All these default values should not be fetched from Env, when we stop supporting Android NNAPI.
- static std::unique_ptr<CompilerOptions> fromGlobalConfig();
-
-public:
- // GENERAL OPTIONS
- std::vector<std::string> backend_list;
-
- // OPTIONS ONLY FOR DEBUGGING/PROFILING
- std::string trace_filepath; //< File path to save trace records
- int graph_dump_level; //< Graph dump level, values between 0 and 2 are valid
- std::string executor; //< Executor name to use
- ManualSchedulerOptions manual_scheduler_options; //< Options for ManualScheduler
- bool he_scheduler; //< HEScheduler if true, ManualScheduler otherwise
- bool he_profiling_mode; //< Whether HEScheduler profiling mode ON/OFF
- bool disable_compile; //< Run with Interpreter if true, try compilation otherwise
- bool fp16_enable; //< Whether fp16 mode ON/OFF
- PartialGraphOptions partial_graph_options;
-};
-
-struct CompilerArtifact
-{
- CompilerArtifact(void) = delete;
- CompilerArtifact(std::shared_ptr<exec::Executors> executors,
- std::unique_ptr<const util::TracingCtx> tracing_ctx)
- : _executors{executors}, _tracing_ctx{std::move(tracing_ctx)} {};
-
- std::shared_ptr<exec::Executors> _executors;
- std::unique_ptr<const util::TracingCtx> _tracing_ctx;
-};
-
/**
* @brief Class to compile NN package
*/
-class Compiler
+class Compiler : public ICompiler
{
public:
/**
Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
std::vector<std::unique_ptr<CompilerOptions>> &copts);
-public:
/**
- * @brief Do compilation with the options
- *
- * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
+ * @brief Destroy the Compiler object
*/
- std::shared_ptr<CompilerArtifact> compile(void);
+ ~Compiler() = default;
+public:
/**
* @brief Do compilation with the options
*
- * @return std::vector<std::shared_ptr<CompilerArtifact>> Executors as a result of compilation
- * for pipeline
- */
- std::vector<std::shared_ptr<CompilerArtifact>> compile(const char *package_file_path,
- const char *map_file_path);
-
- State state(void) const { return _state; }
-
- /**
- * @brief Allow to compute float32 using float16 data type
- */
- void enableToFp16();
-
- /**
- * @brief Build the partial graphs to compile with original graph
+ * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
*/
- bool buildPartialGraph(uint32_t num_graphs);
-
-private:
- void checkProfilerConditions();
- std::shared_ptr<ir::Graph> &primary_subgraph()
- {
- return _nnpkg->primary_model()->at(ir::SubgraphIndex{0});
- }
+ std::shared_ptr<CompilerArtifact> compile(void);
private:
- std::shared_ptr<ir::NNPkg> _nnpkg;
- // NOTE These executors does not have duplicated subgraph. This mean they do not allow support
- // subgraphs being called recursively because data of non-constant tensor of parent executor will
- // be updated by child executor. If you want to support subgraphs being called recursively, you
- // have to add allocate non-constant tensor memory of executors in execution time when each
- // subgraph is called.
- State _state;
- std::vector<CompilerOptions *> _voptions;
+ std::shared_ptr<ir::Model> _model;
+ CompilerOptions *_options;
};
} // namespace compiler
-
} // namespace onert
#endif // __ONERT_COMPILER_COMPILE_H_
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_COMPILER_FACTORY_H__
+#define __ONERT_COMPILER_COMPILER_FACTORY_H__
+
+#include "ICompiler.h"
+#include "CompilerOptions.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+// TODO Support register and use compiler plugin
+class CompilerFactory
+{
+public:
+ static CompilerFactory &get();
+
+public:
+ std::unique_ptr<ICompiler> create(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts);
+
+private:
+ // It is not allowed to use CompilerFactory without get()
+ CompilerFactory() = default;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_COMPILER_FACTORY_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_COMPILER_OPTIONS_H_
+#define __ONERT_COMPILER_COMPILER_OPTIONS_H_
+
+#include "ir/OpCode.h"
+#include "ir/Index.h"
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace onert
+{
+namespace compiler
+{
+
+struct ManualSchedulerOptions
+{
+public:
+ void setBackendMap(const std::string &str);
+
+public:
+ std::string backend_for_all;
+ std::unordered_map<ir::OpCode, std::string> opcode_to_backend;
+ std::unordered_map<ir::OperationIndex, std::string> index_to_backend;
+};
+
+class CompilerOptions
+{
+public:
+ /**
+ * @brief Set default values for CompilerOptions
+ * @return Generated CompileOption
+ *
+ * @note All these default values should not be fetched from Env
+ * when we stop supporting Android NNAPI.
+ */
+ static std::unique_ptr<CompilerOptions> fromGlobalConfig();
+
+ /**
+ * @brief Allow to compute float32 using float16 data type
+ */
+ void enableToFp16() { fp16_enable = true; }
+
+ /**
+ * @brief Force default values of CompilerOptions for correct compilations
+ *
+ * @note This should be called after CompilerOptions setting is finished
+ * to prevent value overwriting
+ */
+ void forceInternalOptions();
+
+ /**
+ * @brief Print option value
+ */
+ void verboseOptions();
+
+public:
+ // GENERAL OPTIONS
+ std::vector<std::string> backend_list;
+
+ // OPTIONS ONLY FOR DEBUGGING/PROFILING
+ std::string trace_filepath; //< File path to save trace records
+ int graph_dump_level; //< Graph dump level, values between 0 and 2 are valid
+ std::string executor; //< Executor name to use
+ ManualSchedulerOptions manual_scheduler_options; //< Options for ManualScheduler
+ bool he_scheduler; //< HEScheduler if true, ManualScheduler otherwise
+ bool he_profiling_mode; //< Whether HEScheduler profiling mode ON/OFF
+ bool fp16_enable; //< Whether fp16 mode ON/OFF
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_COMPILER_OPTIONS_H_
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ICompiler.h
+ * @brief This file contains ICompiler class to define and run compilation phase
+ */
+
+#ifndef __ONERT_COMPILER_I_COMPILER_H_
+#define __ONERT_COMPILER_I_COMPILER_H_
+
+#include "exec/IExecutors.h"
+#include "util/TracingCtx.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+struct CompilerArtifact
+{
+ CompilerArtifact(void) = delete;
+ CompilerArtifact(std::shared_ptr<exec::IExecutors> executors,
+ std::unique_ptr<const util::TracingCtx> tracing_ctx)
+ : _executors{executors}, _tracing_ctx{std::move(tracing_ctx)} {};
+
+ std::shared_ptr<exec::IExecutors> _executors;
+ std::unique_ptr<const util::TracingCtx> _tracing_ctx;
+};
+
+class ICompiler
+{
+public:
+ /**
+ * @brief Virtual ICompiler destructor
+ * @note Require derived class destructor
+ */
+ virtual ~ICompiler() = default;
+
+ /**
+ * @brief Do compilation
+ * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
+ */
+ virtual std::shared_ptr<CompilerArtifact> compile(void) = 0;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_I_COMPILER_H_
{
public:
LoweredGraph(const ir::Graph &graph, const compiler::CompilerOptions &options);
- LoweredGraph(const ir::Graph &parent_graph, const ir::Graph &graph,
- const compiler::CompilerOptions &options);
ir::Graph &graph() { return _graph; }
const ir::Graph &graph() const { return _graph; }
- ir::Graph &parent_graph() { return _parent_graph; }
- const ir::Graph &parent_graph() const { return _parent_graph; }
const compiler::GraphLowerInfo &lower_info() const { return _lower_info_map; }
compiler::GraphLowerInfo &lower_info() { return _lower_info_map; }
std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
* It allows the original graph can be compiled multiple times.
*/
ir::Graph _graph;
- ir::Graph _parent_graph;
std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
compiler::GraphLowerInfo _lower_info_map;
ir::OperationIndexMap<bool> _has_dynamic_tensor_map;
void dump();
+ /**
+ * @brief Create a lowered model shape inferer map
+ * @param[in] lowered_subgs lowered model subgraph map
+ * @return Shape inferer map
+ */
+ static std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>>
+ createStaticShapeInferers(
+ const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<LoweredGraph>> &lowered_subgs);
+
private:
bool checkDynamicInput(const ir::Operation &op);
bool checkDynamicOutput(const ir::Operation &op);
#define __ONERT_EXEC_EXECUTION_H__
#include "ir/Layout.h"
-#include "exec/Executors.h"
+#include "exec/IExecutors.h"
#include "IODescription.h"
#include <thread>
* @brief Construct a new Execution object
* @param[in] executor Model executor
*/
- Execution(const std::shared_ptr<Executors> &executors);
+ Execution(const std::shared_ptr<IExecutors> &executors);
public:
/**
* @brief Returns primary graph object
* @return Graph object
*/
- const ir::Graph &primary_subgraph() const { return primary_executor()->graph(); }
+ const ir::Graph &primary_subgraph() const { return entryExecutor()->graph(); }
- const ir::Graph &primary_parentgraph() const { return primary_executor()->parent_graph(); }
/**
* @brief Change input shape
* @param[in] index Input index
ir::Shape getInputShape(ir::IOIndex ind) const;
ir::Shape getOutputShape(ir::IOIndex ind) const;
- //
- // Experimental API
- //
-
- // accessor
- std::vector<
- std::tuple<std::shared_ptr<onert::exec::Execution>, onert::ir::IOIndex, onert::ir::IOIndex>>
- getNextExes()
- {
- return next_exes;
- }
- std::deque<std::pair<IODescription *, uint32_t>> *getAsyncIoDescs() { return &_async_io_descs; }
- std::deque<std::vector<void *>> *getAsyncResults() { return &_async_results; }
-
- /**
- * @brief Push IO information between related executions into next_exes
- * @param[in] next address of next execution
- * @param[in] o_index Output index of current execution (it will be the input of next execution)
- * @param[in] i_index Input index of next execution
- */
- void pushNextExe(std::shared_ptr<onert::exec::Execution> next, onert::ir::IOIndex o_index,
- onert::ir::IOIndex i_index)
- {
- next_exes.push_back({next, o_index, i_index});
- }
-
- /**
- * @brief Create New IODescription instance for new inputs outputs
- * @param[in] index instance count number
- */
- void createNewAsyncDesc(uint32_t count = 0);
-
- /**
- * @brief Set async input data's information
- * @param[in] index Input index
- * @param[in] buffer Input data's buffer pointer
- * @param[in] length Input data's length
- * @param[in] layout Input data's data format
- */
- void executeAsyncInput(const ir::IOIndex &index, const void *buffer, size_t length,
- ir::Layout layout = ir::Layout::NHWC);
-
- /**
- * @brief Set async output data's information
- * @param[in] index Output index
- * @param[in] buffer Output data's buffer pointer
- * @param[in] length Output data's length
- * @param[in] layout Output data's data format
- */
- void executeAsyncOutput(const ir::IOIndex &index, void *buffer, size_t length,
- ir::Layout layout = ir::Layout::NHWC);
-
- /**
- * @brief Async execution
- * @note It should be called after setting input and output buffer
- */
- void AsyncExecute();
-
- /**
- * @brief Set finish
- */
- void setFinish();
-
- /**
- * @brief Check if input queue is empty
- * @return @c true if queue is empty, otherwise @c false
- */
- bool isEmptyQueue();
-
- /**
- * @brief Wait semaphore to prevent race condition
- */
- void asyncIoDescSemWait();
-
- /**
- * @brief Post semaphore to prevent race condition
- */
- void asyncIoDescSemPost();
-
- /**
- * @brief Inference
- * @note this function provided to the thread for pipelining
- */
- void runInference();
-
- /**
- * @brief Check if stop_wait is true
- * @return @c true if stop_wait is true, otherwise @c false
- */
- bool stopWait(void) const;
-
- /**
- * @brief Set stop_wait to terminate consumer thread
- */
- void sholudStop();
-
private:
- const std::unique_ptr<IExecutor> &primary_executor() const
- {
- return _executors->at(ir::SubgraphIndex{0});
- };
- std::unique_ptr<IExecutor> &primary_executor() { return _executors->at(ir::SubgraphIndex{0}); };
+ const IExecutor *entryExecutor() const { return _executors->entryExecutor(); };
+ IExecutor *entryExecutor() { return _executors->entryExecutor(); };
private:
- const std::shared_ptr<Executors> _executors;
+ const std::shared_ptr<IExecutors> _executors;
IODescription _io_desc;
- std::deque<std::pair<IODescription *, uint32_t>> _async_io_descs;
- sem_t _async_io_descs_sem;
- std::deque<std::vector<void *>> _async_results;
- std::vector<
- std::tuple<std::shared_ptr<onert::exec::Execution>, onert::ir::IOIndex, onert::ir::IOIndex>>
- next_exes;
std::unique_ptr<std::thread> _exec_thread;
bool finished{false};
- bool stop_wait{false};
};
} // namespace exec
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_EXEC_EXECUTORS_H__
-#define __ONERT_EXEC_EXECUTORS_H__
-
-#include "IExecutor.h"
-#include "ir/NNPkg.h"
-
-namespace onert
-{
-namespace exec
-{
-
-/**
- * @brief Class to gather executors
- */
-class Executors
-{
-public:
- Executors(void) = default;
- Executors(std::unique_ptr<ir::ModelEdges> model_edges) { _model_edges = std::move(model_edges); }
- Executors(const Executors &) = delete;
- Executors(Executors &&) = default;
-
- // TODO Use Executor index
- void emplace(ir::SubgraphIndex idx, std::unique_ptr<IExecutor> exec)
- {
- _executors.emplace(idx, std::move(exec));
- }
-
- std::unique_ptr<IExecutor> &at(ir::SubgraphIndex idx) { return _executors.at(idx); }
-
- uint32_t inputSize() const;
-
- uint32_t outputSize() const;
-
- const ir::OperandInfo inputInfo(const ir::IOIndex &index);
-
- const ir::OperandInfo outputInfo(const ir::IOIndex &index);
-
- void execute(const IODescription &desc);
-
-private:
- void executeEntries(const IODescription &desc);
-
-private:
- // TODO Use Executor index
- // Changing index will effect if/while compile and kernel implementation
- std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>> _executors;
- // NOTE _model_edges may use different struct type for executor implementation
- std::unique_ptr<ir::ModelEdges> _model_edges;
-};
-
-} // namespace exec
-} // namespace onert
-
-#endif // __ONERT_EXEC_EXECUTORS_H__
template <typename T, typename... Args> void wrap(Args &&... args)
{
- for (auto &function : _functions)
+ for (auto &&function : _functions)
{
function = std::make_unique<T>(std::move(function), args...);
}
{
namespace exec
{
-class IExecutionObserver;
/**
* @brief Struct to define interface of Executor
*/
*
* @return Graph object
*/
- virtual const ir::Graph &graph() = 0;
-
- /**
- * @brief Returns parent graph object
- *
- * @return Graph object
- */
- virtual const ir::Graph &parent_graph() = 0;
+ virtual const ir::Graph &graph() const = 0;
/**
* @brief Set an ordering on operations
const std::vector<backend::IPortableTensor *> &outputs) = 0;
/**
+ * @brief Get input tensor objects
+ *
+ * @return Vector of @c IOTensor
+ */
+ virtual const std::vector<backend::builtin::IOTensor *> &getInputTensors() const = 0;
+
+ /**
* @brief Get output tensor objects
*
* @return Vector of @c IOTensor
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_I_EXECUTORS_H__
+#define __ONERT_EXEC_I_EXECUTORS_H__
+
+#include "IExecutor.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather NN package's executor set
+ */
+class IExecutors
+{
+public:
+ /**
+ * @brief Virtual IExecutors destructor
+ * @note Require derived class destructor
+ */
+ virtual ~IExecutors() = default;
+
+public:
+ /**
+ * @brief Insert executor in executor set
+ * @param[in] model_index Model index
+ * @param[in] subg_index Subgraph index
+ * @param[in] exec Executor to insert
+ *
+ * @todo Use Executor index
+ */
+ virtual void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ std::unique_ptr<IExecutor> exec) = 0;
+
+ /**
+ * @brief Return executor of index
+ * @param[in] model_index Model index
+ * @param[in] subg_index Subgraph index
+ * @return Executor
+ */
+ virtual IExecutor *at(const ir::ModelIndex &model_index,
+ const ir::SubgraphIndex &subg_index) const = 0;
+
+ IExecutor *entryExecutor() const { return at(ir::ModelIndex{0}, ir::SubgraphIndex{0}); }
+
+ /**
+ * @brief Return executor set's number of input
+ * @return Number of input
+ */
+ virtual uint32_t inputSize() const = 0;
+
+ /**
+ * @brief Return executor set's number of output
+ * @return Number of output
+ */
+ virtual uint32_t outputSize() const = 0;
+
+ /**
+ * @brief Return NN package input tensor info
+ * @param[in] index Input index
+ * @return Tensor info
+ */
+ virtual const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const = 0;
+
+ /**
+ * @brief Return NN package output tensor info
+ * @param[in] index Output index
+ * @return Tensor info
+ */
+ virtual const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const = 0;
+
+ /**
+ * @brief Execute NN package executor set
+ * @param[in] desc Input and output buffer description
+ */
+ virtual void execute(const IODescription &desc) = 0;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_I_EXECUTORS_H__
void verify(void);
void removeOperand(const OperandIndex &ind) { _operands.remove(ind); }
void setLayout(Layout layout) { _layout = layout; }
- void setPartialModel(const std::shared_ptr<Model> &partial_model)
- {
- _partialgraphs = partial_model;
- }
- void
- setTensorName(std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> &tensor_names)
- {
- _tensor_names = tensor_names;
- }
private:
bool checkOperandsForOperation(const Operation &operation);
const Operations &operations() const { return _operations; }
Operations &operations() { return _operations; }
Layout layout() const { return _layout; }
- std::shared_ptr<Model> &partialgraphs() { return _partialgraphs; }
- std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> &tensor_names()
- {
- return _tensor_names;
- }
- std::unordered_map<std::string, IOIndex>::const_iterator _name_to_input_begin() const
- {
- return _name_to_input.begin();
- }
- std::unordered_map<std::string, IOIndex>::const_iterator _name_to_input_end() const
- {
- return _name_to_input.end();
- }
- std::unordered_map<std::string, IOIndex>::const_iterator _name_to_output_begin() const
- {
- return _name_to_output.begin();
- }
- std::unordered_map<std::string, IOIndex>::const_iterator _name_to_output_end() const
- {
- return _name_to_output.end();
- }
- void input_sort() { _inputs.sort(); }
- void output_sort() { _outputs.sort(); }
// Topological sort
public:
std::unordered_map<std::string, IOIndex> _name_to_output;
// TFLite and circle's default layout is NHWC;
Layout _layout{Layout::NHWC};
-
- // model for partial graphs
- std::shared_ptr<ir::Model> _partialgraphs;
- std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> _tensor_names;
};
} // namespace ir
using IOIndex = ::onert::util::Index<uint32_t, IOIndexTag>;
struct SubgraphIndexTag;
-using SubgraphIndex = ::onert::util::Index<uint32_t, SubgraphIndexTag>;
+using SubgraphIndex = ::onert::util::Index<uint16_t, SubgraphIndexTag>;
struct ModelIndexTag;
-using ModelIndex = ::onert::util::Index<uint32_t, ModelIndexTag>;
+using ModelIndex = ::onert::util::Index<uint16_t, ModelIndexTag>;
template <typename IndexType>
std::ostream &_index_print_impl(std::ostream &o, const std::string &prefix, IndexType index)
#include <unordered_set>
#include <vector>
+#include "ir/Graph.h"
#include "ir/Index.h"
#include "ir/Model.h"
~NNPkg() = default;
NNPkg(std::shared_ptr<Model> model) { _models[ModelIndex{0}] = model; }
- std::shared_ptr<Model> primary_model() { return _models.at(onert::ir::ModelIndex{0}); }
+ std::shared_ptr<Model> primary_model() const { return _models.at(onert::ir::ModelIndex{0}); }
/**
* @brief Put model at index
*/
const ModelEdges &model_edges() { return _edges; }
+ /**
+ * @brief Verify NNPkg
+ *
+ */
+ void verify(void)
+ {
+ // Verify edges information
+ //
+ // Only duplicates of nnpkg output and Edge `from` are possible.
+ // | Whether duplicates are possible | Edge `to` | Edge `from` |
+ // | nnpkg input (input of subgraph) | X (*1) | X (*2) |
+ // | nnpkg output (output of subgraph) | X (*2) | O |
+ // *1. The subjects who determine values of each buffer are different.
+ // - nnpkg input : user input
+ // - Edge `to` : output of another subgraph
+ // *2. `IOIndex` of inputs and outputs of subgraph is distinct.
+ //
+ for (const auto &edge : _edges.edges)
+ {
+ if (std::find(_edges.pkg_inputs.begin(), _edges.pkg_inputs.end(), edge.to) !=
+ _edges.pkg_inputs.end())
+ {
+ throw std::runtime_error{
+ "Invalid edge information. NNPkg inputs and Edge `to` cannot be duplicated"};
+ }
+ }
+ }
+
+ // TODO Find better way to handle single model NNPackage and multi model NNPackage on inputSize(),
+ // outputSize(), inputInfo(), outputInfo()
+
+ /**
+ * @brief Get model input size
+ */
+ uint32_t inputSize() const
+ {
+ return _models.size() == 1 ? primary_model()->primary_subgraph()->getInputs().size()
+ : _edges.pkg_inputs.size();
+ }
+
+ /**
+ * @brief Get model output size
+ */
+ uint32_t outputSize() const
+ {
+ return _models.size() == 1 ? primary_model()->primary_subgraph()->getOutputs().size()
+ : _edges.pkg_outputs.size();
+ }
+
+ /**
+ * @brief Get model input info
+ */
+ OperandInfo &inputInfo(uint32_t index) const
+ {
+ if (_models.size() == 1)
+ {
+ auto const graph = primary_model()->primary_subgraph();
+ auto const operand_index = graph->getInputs().at(index);
+ return graph->operands().at(operand_index).info();
+ }
+
+ auto const &desc = input(index);
+ auto const graph = model(std::get<ModelIndex>(desc))->primary_subgraph();
+ auto const operand_index = graph->getInputs().at(std::get<IOIndex>(desc).value());
+ return graph->operands().at(operand_index).info();
+ }
+
+ /**
+ * @brief Get model output info
+ */
+ OperandInfo &outputInfo(uint32_t index) const
+ {
+ if (_models.size() == 1)
+ {
+ auto const graph = primary_model()->primary_subgraph();
+ auto const operand_index = graph->getOutputs().at(index);
+ return graph->operands().at(operand_index).info();
+ }
+
+ auto const &desc = output(index);
+ auto const graph = model(std::get<ModelIndex>(desc))->primary_subgraph();
+ auto const operand_index = graph->getOutputs().at(std::get<IOIndex>(desc).value());
+ return graph->operands().at(operand_index).info();
+ }
+
// TODO: Add iterate() or getter for edges
private:
} // namespace ir
} // namespace onert
+namespace std
+{
+
+template <> struct hash<onert::ir::IODesc>
+{
+ size_t operator()(const ::onert::ir::IODesc &iodesc) const noexcept
+ {
+ return (std::get<0>(iodesc).value() << 24) | (std::get<1>(iodesc).value() << 16) |
+ std::get<2>(iodesc).value();
+ }
+};
+
+} // namespace std
+
#endif // __ONERT_IR_NNPKG_H__
#include <initializer_list>
#include <vector>
-#include <algorithm>
#include "ir/Index.h"
void append(const OperandIndex &index) { _vec.emplace_back(index); }
void append(const OperandIndexSequence &l) { _vec.insert(_vec.end(), l.begin(), l.end()); }
- void sort()
- {
- std::sort(_vec.begin(), _vec.end(),
- [](const auto &lhs, const auto &rhs) { return lhs.value() < rhs.value(); });
- }
-
public:
uint32_t size() const { return static_cast<uint32_t>(_vec.size()); }
const OperandIndex &at(IOIndex set_index) const { return _vec.at(set_index.value()); }
struct Shape
{
public:
- static int32_t const UNSPECIFIED_DIM;
- static int32_t const MAX_RANK;
+ static int32_t const kUnspecifiedDim;
+ static int32_t const kMaxRank;
Shape() = default;
*/
bool hasUnspecifiedDims() const
{
- return (std::find(_dimensions.begin(), _dimensions.end(), UNSPECIFIED_DIM) !=
+ return (std::find(_dimensions.begin(), _dimensions.end(), kUnspecifiedDim) !=
_dimensions.end());
}
CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;ruy;xnnpack;gpu_cl;trix;bcq") // FIXME Remove bcq
CONFIG(OP_BACKEND_ALLOPS , std::string , "")
CONFIG(OP_BACKEND_MAP , std::string , "")
-CONFIG(DISABLE_COMPILE , bool , "0")
CONFIG(ONERT_LOG_ENABLE , bool , "0")
CONFIG(CPU_MEMORY_PLANNER , std::string , "WIC")
CONFIG(EXECUTOR , std::string , "Linear")
*/
T value() const { return _index; }
+ /**
+ * @brief Return max index value
+ *
+ * @return Maximum valid index value
+ */
+ static T max() { return UNDEFINED - 1; }
+
private:
T _index;
};
// This implementation is a workaround in case of adding operands while iteration
std::list<Index> l;
- for (auto &e : _objects)
+ for (const auto &e : _objects)
{
l.push_back(e.first);
}
- for (auto &index : l)
+ for (const auto &index : l)
{
fn(index, *_objects[index]);
}
template <size_t from, size_t to, typename Enable = void> struct ForEachDimension
{
- template <typename L, typename... Args>
+ template <typename L>
static void unroll(const onert::ir::Shape &shape, onert::ir::Coordinates &coords,
- L &&lambda_function, Args &&... args)
+ L lambda_function)
{
static_assert(from < to, "from must not be less than to");
assert(static_cast<int>(to) <= shape.rank());
for (auto v = 0; v < d; v++)
{
coords.set(from, v);
- ForEachDimension<from + 1, to>::unroll(shape, coords, std::forward<L>(lambda_function),
- std::forward<Args>(args)...);
+ ForEachDimension<from + 1, to>::unroll(shape, coords, lambda_function);
}
}
};
template <size_t from, size_t to>
struct ForEachDimension<from, to, typename std::enable_if<from == to>::type>
{
- template <typename L, typename... Args>
+ template <typename L>
static void unroll(const onert::ir::Shape &shape, onert::ir::Coordinates &coords,
- L &&lambda_function, Args &&... args)
+ L lambda_function)
{
UNUSED_RELEASE(shape);
assert(static_cast<int>(to) <= shape.rank());
- lambda_function(coords, std::forward<Args>(args)...);
+ lambda_function(coords);
}
};
-template <typename L, typename... Args>
-inline void ShapeLoop(const onert::ir::Shape &shape, L &&lambda_function, Args &&... args)
+template <typename L> inline void ShapeLoop(const onert::ir::Shape &shape, L lambda_function)
{
assert(shape.rank() > 0);
for (auto i = 0; i < shape.rank(); ++i)
{
case 0:
coords.set(0, 0);
- ForEachDimension<0, 0>::unroll(shape, coords, std::forward<L>(lambda_function),
- std::forward<Args>(args)...);
+ ForEachDimension<0, 0>::unroll(shape, coords, lambda_function);
break;
case 1:
- ForEachDimension<0, 1>::unroll(shape, coords, std::forward<L>(lambda_function),
- std::forward<Args>(args)...);
+ ForEachDimension<0, 1>::unroll(shape, coords, lambda_function);
break;
case 2:
- ForEachDimension<0, 2>::unroll(shape, coords, std::forward<L>(lambda_function),
- std::forward<Args>(args)...);
+ ForEachDimension<0, 2>::unroll(shape, coords, lambda_function);
break;
case 3:
- ForEachDimension<0, 3>::unroll(shape, coords, std::forward<L>(lambda_function),
- std::forward<Args>(args)...);
+ ForEachDimension<0, 3>::unroll(shape, coords, lambda_function);
break;
case 4:
- ForEachDimension<0, 4>::unroll(shape, coords, std::forward<L>(lambda_function),
- std::forward<Args>(args)...);
+ ForEachDimension<0, 4>::unroll(shape, coords, lambda_function);
break;
case 5:
- ForEachDimension<0, 5>::unroll(shape, coords, std::forward<L>(lambda_function),
- std::forward<Args>(args)...);
+ ForEachDimension<0, 5>::unroll(shape, coords, lambda_function);
break;
case 6:
- ForEachDimension<0, 6>::unroll(shape, coords, std::forward<L>(lambda_function),
- std::forward<Args>(args)...);
+ ForEachDimension<0, 6>::unroll(shape, coords, lambda_function);
break;
default:
assert(false && "ShapeLoop, 1 <= Shape'rank <= 6");
void DynamicMemoryManager::deallocate(void)
{
- for (auto &mem_alloc : _mem_alloc_map)
+ for (auto &&mem_alloc : _mem_alloc_map)
{
// Release memory buffer of mem_alloc
mem_alloc.second->release();
{
// Find the right position for claiming
uint32_t next_offset = 0;
- for (auto &mem_claim : _claim_table)
+ for (const auto &mem_claim : _claim_table)
{
auto claimed_base_offset = mem_claim.first;
auto claimed_size = _mem_plans[mem_claim.second].size;
{
_nonconst_mgr->allocate();
- for (auto &pair : _tensors->native_tensors())
+ for (auto &&pair : _tensors->native_tensors())
{
const auto &ind = pair.first;
auto tensor = pair.second.get();
const_cast<ir::Graph *>(graph())->operands().iterate(
[&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
- for (auto &it : ret)
+ for (auto &&it : ret)
{
auto &fn_seq = it.second;
fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
public:
void setTensor(IPortableTensor *tensor);
void setUserTensor(uint8_t *buffer, size_t size);
- ir::OperandInfo orig_info() const { return _orig_info; }
+ const ir::OperandInfo &orig_info() const { return _orig_info; }
ir::Layout orig_layout() const { return _orig_layout; }
public:
const std::shared_ptr<TensorRegistry> &tensor_reg,
const std::shared_ptr<ExternalContext> &external_context)
: basic::KernelGeneratorBase{graph}, _dyn_tensor_manager{dyn_tensor_manager},
- _tensor_reg{tensor_reg}, _tensor_registries{}, _executors{nullptr}, _external_context{
- external_context}
+ _tensor_reg{tensor_reg}, _tensor_registries{}, _executors{nullptr}, _model_index{},
+ _external_context{external_context}
{
UNUSED_RELEASE(_graph);
UNUSED_RELEASE(_tensor_registries);
input_tensors.erase(input_tensors.begin());
auto fn = std::make_unique<::onert::backend::builtin::kernel::IfLayer>(
cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executors,
- _external_context);
+ _model_index, _external_context);
_return_fn = std::move(fn);
}
// WhileLayer just set Executors instead of cond and body executor to avoid complexity of
// creating executor recusively
auto fn = std::make_unique<::onert::backend::builtin::kernel::WhileLayer>(
- input_tensors, output_tensors, cond_subg_index, body_subg_index, _executors,
+ input_tensors, output_tensors, cond_subg_index, body_subg_index, _executors, _model_index,
_dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context);
_return_fn = std::move(fn);
#include "../../compiler/TensorRegistries.h"
#include "backend/basic/KernelGeneratorBase.h"
-#include "exec/Executors.h"
+#include "exec/IExecutors.h"
#include "ir/Graph.h"
namespace onert
{
_tensor_registries = tensor_registries;
}
- void setExecutors(const std::shared_ptr<exec::Executors> &executors)
+ void setExecutors(const std::shared_ptr<exec::IExecutors> &executors)
{
// FIXME Using shared_ptr's raw pointer!
_executors = executors.get();
}
+ void setModelIndex(const ir::ModelIndex &index) { _model_index = index; }
+
std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
private:
DynamicTensorManager *_dyn_tensor_manager;
std::shared_ptr<TensorRegistry> _tensor_reg;
compiler::TensorRegistries _tensor_registries;
- exec::Executors *_executors;
+ exec::IExecutors *_executors;
+ ir::ModelIndex _model_index;
const std::shared_ptr<ExternalContext> _external_context;
};
const std::vector<backend::IPortableTensor *> input_tensors,
const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
- exec::Executors *executors,
+ exec::IExecutors *executors, const ir::ModelIndex &model_index,
const std::shared_ptr<ExternalContext> &external_context)
: _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
_then_subg_index{then_subg_index}, _else_subg_index{else_subg_index}, _executors{executors},
- _external_context{external_context}
+ _model_index{model_index}, _external_context{external_context}
{
// At this point, executors may not have executors of then subg and else subg
}
if (cond_result)
{
VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl;
- subg_exec = _executors->at(_then_subg_index).get();
+ subg_exec = _executors->at(_model_index, _then_subg_index);
}
else
{
VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl;
- subg_exec = _executors->at(_else_subg_index).get();
+ subg_exec = _executors->at(_model_index, _else_subg_index);
}
subg_exec->execute(_input_tensors, _output_tensors);
#define __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
#include <backend/IPortableTensor.h>
-#include <exec/Executors.h>
+#include <exec/IExecutors.h>
#include "../ExternalContext.h"
namespace onert
const std::vector<backend::IPortableTensor *> input_tensors,
const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
- exec::Executors *executors, const std::shared_ptr<ExternalContext> &external_context);
+ exec::IExecutors *executors, const ir::ModelIndex &model_index,
+ const std::shared_ptr<ExternalContext> &external_context);
public:
void run() override;
const std::vector<backend::IPortableTensor *> _output_tensors;
const ir::SubgraphIndex _then_subg_index;
const ir::SubgraphIndex _else_subg_index;
- exec::Executors *_executors;
+ exec::IExecutors *_executors;
+ ir::ModelIndex _model_index;
const std::shared_ptr<ExternalContext> _external_context;
};
src_offsets_it->resize(0);
dst_offsets_it->resize(0);
if (underlying_type(src->data_type()) != underlying_type(dst->data_type()))
- throw std::runtime_error("data type does not match");
+ continue;
const auto permute_type = [&]() -> PermuteType {
if (src->getShape().rank() == 4 && src->layout() == ir::Layout::NHWC &&
dst->layout() == ir::Layout::NCHW)
return PermuteType::COPY;
}
}();
+
+ // TODO Support different types
auto fn = [&](backend::ITensor &src_tensor) {
dst->access([&](backend::ITensor &dst_tensor) {
// NOTE The buffer of both tensor can be nullptr in this step
// 1. The tasks for multithreathing was created
// 2. The tasks's size > 1
// 3. Both tensors are not dynamic
+ // 4. Data types of both tensors are different
if (_tasks_map.find(src) == _tasks_map.end() || _tasks_map.at(src).size() == 1 ||
- src->is_dynamic() || dst->is_dynamic())
+ src->is_dynamic() || dst->is_dynamic() ||
+ underlying_type(src->data_type()) != underlying_type(dst->data_type()))
{
permute(src, dst, src->getShape().rank(), src_offsets, dst_offsets);
}
WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &cond_subg_index,
- const ir::SubgraphIndex &body_subg_index, exec::Executors *executors,
+ const ir::SubgraphIndex &body_subg_index, exec::IExecutors *executors,
+ const ir::ModelIndex &model_index,
basic::DynamicMemoryManager *dyn_memory_manager,
const std::shared_ptr<ExternalContext> &external_context)
: _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
_input_tensors{input_tensors}, _output_tensors{output_tensors}, _executors{executors},
- _dyn_memory_manager{dyn_memory_manager}, _external_context{external_context}
+ _model_index{model_index}, _dyn_memory_manager{dyn_memory_manager}, _external_context{
+ external_context}
{
// At this point, executors may not have executors of cond subg and body subg
}
// // Run cond subg
// If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
// "_dst_tensors"
- auto cond_exec = _executors->at(_cond_subg_index).get();
- auto body_exec = _executors->at(_body_subg_index).get();
+ auto cond_exec = _executors->at(_model_index, _cond_subg_index);
+ auto body_exec = _executors->at(_model_index, _body_subg_index);
// Need a temp tensor to hold the cond subgraph output
assert(cond_exec->getOutputTensors().size() == 1);
#define __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
#include <backend/IPortableTensor.h>
-#include <exec/Executors.h>
+#include <exec/IExecutors.h>
#include <exec/IFunction.h>
#include <ir/OperandIndexSequence.h>
#include <ir/Graph.h>
WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
- exec::Executors *executors, basic::DynamicMemoryManager *dyn_memory_manager,
+ exec::IExecutors *executors, const ir::ModelIndex &model_index,
+ basic::DynamicMemoryManager *dyn_memory_manager,
const std::shared_ptr<ExternalContext> &external_context);
public:
const ir::SubgraphIndex _body_subg_index;
const std::vector<backend::IPortableTensor *> _input_tensors;
const std::vector<backend::IPortableTensor *> _output_tensors;
- exec::Executors *_executors;
+ exec::IExecutors *_executors;
+ const ir::ModelIndex _model_index;
basic::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors
const std::shared_ptr<ExternalContext> _external_context;
};
#include "pass/OddOutputPass.h"
#include "pass/PassRunner.h"
#include "pass/UnusedOperandEliminationPass.h"
-#include "../backend/builtin/Config.h"
#include "../dumper/dot/DotDumper.h"
-#include "../interp/InterpExecutor.h"
-#include "../ir/OperationCloner.h"
+#include "../exec/SingleModelExecutors.h"
#include "../ir/OperationDumper.h"
#include "../ir/verifier/Verifier.h"
#include "compiler/StaticShapeInferer.h"
-#include "util/ConfigSource.h"
-#include "util/logging.h"
-#include <misc/polymorphic_downcast.h>
#include <misc/string_helpers.h>
-#include <json/json.h>
-
-// TODO Remove using fstream header
-#include <fstream>
-
-namespace
-{
-
-using namespace onert;
-
-std::string getOpBackends(std::unordered_map<ir::OpCode, std::string> &opcode_to_backend)
-{
- std::unordered_map<ir::OpCode, std::string>::iterator it;
- std::string opbackends;
-
- for (it = opcode_to_backend.begin(); it != opcode_to_backend.end(); ++it)
- {
- if (!opbackends.empty())
- opbackends = opbackends + ", ";
-
- auto opcode = it->first;
- const std::string opname = ir::toString(opcode);
- opbackends += opname + "=" + it->second;
- }
- return opbackends;
-}
-
-void verboseOptions(compiler::CompilerOptions &options)
-{
- VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl;
- VERBOSE(Compiler) << "backend_list : "
- << nnfw::misc::join(options.backend_list.begin(), options.backend_list.end(),
- "/")
- << std::endl;
- VERBOSE(Compiler) << "trace_filepath : " << options.trace_filepath << std::endl;
- VERBOSE(Compiler) << "graph_dump_level : " << options.graph_dump_level << std::endl;
- VERBOSE(Compiler) << "executor : " << options.executor << std::endl;
- VERBOSE(Compiler) << "manual backend_for_all : "
- << options.manual_scheduler_options.backend_for_all << std::endl;
- VERBOSE(Compiler) << "manual_scheduler_options : "
- << getOpBackends(options.manual_scheduler_options.opcode_to_backend)
- << std::endl;
- VERBOSE(Compiler) << "he_scheduler : " << options.he_scheduler << std::endl;
- VERBOSE(Compiler) << "he_profiling_mode : " << options.he_profiling_mode << std::endl;
- VERBOSE(Compiler) << "disable_compile : " << options.disable_compile << std::endl;
- VERBOSE(Compiler) << "fp16_enable : " << options.fp16_enable << std::endl
- << std::noboolalpha;
-}
-
-std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::StaticShapeInferer>>
-createStaticShapeInferers(
- const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
- &lowered_subgs)
-{
- // Allocate StaticShapeInferer per each subgraph
- std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::StaticShapeInferer>> inferers;
- for (auto &pair : lowered_subgs)
- {
- const auto &subg_index = pair.first;
- auto &lowered_subg = pair.second;
- inferers[subg_index] = std::make_unique<compiler::StaticShapeInferer>(lowered_subg.get());
- }
-
- // Append observers in all StaticShapeInferers
- for (auto &pair : lowered_subgs)
- {
- const auto &subg_index = pair.first;
- auto &lowered_subg = pair.second;
-
- // TODO: Change this iteration for all to controlflow iteration
- lowered_subg->graph().operations().iterate([&](const ir::OperationIndex &,
- const ir::Operation &op) {
- // A Function to append child inferers. These make it possible for a StaticShapeInferer to
- // call StaticShapeInferes of child subgraphs recursively
- auto appendChildInferer = [&](const ir::SubgraphIndex &child_subg_idx) {
- auto *child_inferer = inferers.at(child_subg_idx).get();
- inferers.at(subg_index)->appendChildInferer(child_subg_idx, child_inferer);
- };
-
- // A Function to appaend subg input observers. This makes it possible for a StaticShapeInferer
- // to update inputs of child subgraphs
- auto appendSubgraphInputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
- std::vector<ir::Operand *> child_subg_inputs;
- auto &child_subg = lowered_subgs.at(child_subg_idx)->graph();
- for (const auto &input_idx : child_subg.getInputs())
- {
- auto operand_ptr = child_subg.operands().getRawPtr(input_idx);
- child_subg_inputs.emplace_back(operand_ptr);
- }
- inferers.at(subg_index)
- ->appendSubgInputObserver(child_subg_idx,
- std::make_unique<compiler::OperandObserver>(child_subg_inputs));
- };
-
- // A Function to set controlflow output observers. This makes it possible for a
- // StaticShapeInferer to update outputs of parent controlflow opeerations
- auto setControlFlowOutputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
- std::vector<ir::Operand *> cf_outputs;
- auto &subg = lowered_subg->graph();
- for (const auto &output_idx : op.getOutputs())
- {
- auto operand_ptr = subg.operands().getRawPtr(output_idx);
- cf_outputs.emplace_back(operand_ptr);
- }
- inferers.at(child_subg_idx)
- ->setControlflowOutputObserver(std::make_unique<compiler::OperandObserver>(cf_outputs));
- };
-
- // Append Observers in a StaticShapeInferer
- if (op.opcode() == ir::OpCode::If)
- {
- const auto &if_op = nnfw::misc::polymorphic_downcast<const ir::operation::If &>(op);
-
- appendChildInferer(if_op.param().then_subg_index);
- appendChildInferer(if_op.param().else_subg_index);
-
- appendSubgraphInputObserver(if_op.param().then_subg_index);
- appendSubgraphInputObserver(if_op.param().else_subg_index);
-
- setControlFlowOutputObserver(if_op.param().then_subg_index);
- }
- else if (op.opcode() == ir::OpCode::While)
- {
- const auto &while_op = nnfw::misc::polymorphic_downcast<const ir::operation::While &>(op);
-
- appendChildInferer(while_op.param().cond_subg_index);
- appendChildInferer(while_op.param().body_subg_index);
-
- appendSubgraphInputObserver(while_op.param().cond_subg_index);
- appendSubgraphInputObserver(while_op.param().body_subg_index);
-
- setControlFlowOutputObserver(while_op.param().body_subg_index);
- }
- });
- }
-
- return inferers;
-}
-
-} // namespace
namespace onert
{
-
namespace compiler
{
-void ManualSchedulerOptions::setBackendMap(const std::string &str)
-{
- // TODO Support multiple subgraphs for manual scheduling
- auto key_val_list = nnfw::misc::split(str, ';');
- for (const auto &key_val_str : key_val_list)
- {
- if (key_val_str.empty())
- {
- continue;
- }
-
- auto key_val = nnfw::misc::split(key_val_str, '=');
- const auto &key_str = key_val.at(0);
- const auto &val = key_val.at(1);
- auto key = static_cast<uint32_t>(std::stoi(key_str));
- this->index_to_backend.emplace(ir::OperationIndex{key}, val);
- }
-}
-
-std::unique_ptr<CompilerOptions> CompilerOptions::fromGlobalConfig()
-{
- auto o = std::make_unique<CompilerOptions>();
- o->backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
- o->trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
- o->graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
- o->executor = util::getConfigString(util::config::EXECUTOR);
- o->he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
- o->he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
- o->disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE);
- o->fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
- {
- // Backend for all
- auto &ms_options = o->manual_scheduler_options;
-
- // Default value for op_backend_all is first element in the backend list
- ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS);
-
-// Opcode to Backend
-#define OP(OpName) \
- { \
- const auto &backend_str = util::getConfigString(util::config::OP_BACKEND_##OpName); \
- if (!backend_str.empty()) \
- { \
- ms_options.opcode_to_backend[ir::OpCode::OpName] = backend_str; \
- } \
- }
-#include "ir/Operations.lst"
-#undef OP
-
- // Index to Backend
- auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
- ms_options.setBackendMap(map_str);
- }
- return o;
-}
Compiler::Compiler(const std::shared_ptr<ir::Model> &model, CompilerOptions &copt)
- : _nnpkg{std::make_shared<ir::NNPkg>(model)}, _state{State::CREATED}, _voptions{&copt}
+ : _model{model}, _options{&copt}
{
// DO NOTHING
}
Compiler::Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
std::vector<std::unique_ptr<CompilerOptions>> &copts)
- : _nnpkg{nnpkg}, _state{State::CREATED}, _voptions{}
+ : _model{nnpkg->primary_model()}, _options{copts[0].get()}
{
- for (uint32_t i = 0; i < copts.size(); i++)
- {
- _voptions.push_back(copts[i].get());
- }
-}
-
-void Compiler::enableToFp16()
-{
- for (auto options : _voptions)
- options->fp16_enable = true;
-}
-
-void Compiler::checkProfilerConditions()
-{
- if (_nnpkg->model_count() != 1)
- throw std::runtime_error("NYI: Profiling mode for multiple model is not supported yet");
-
- auto &options = *_voptions[0];
-
- if (options.he_scheduler)
- throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
-
- if (options.executor != "Dataflow")
- throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
-}
-
-bool Compiler::buildPartialGraph(uint32_t num_graphs)
-{
- // Use 1st model and options only on partial graph (pipeline) compile
- assert(_nnpkg->model_count() == 1);
- assert(_voptions.size() == 1);
-
- auto model = _nnpkg->primary_model();
- auto &options = *_voptions[0];
-
- if (model->subgraphs_count() > 1)
- return false;
-
- auto partialgraphs = std::make_shared<ir::Model>();
-
- for (uint32_t idx = 0; idx < num_graphs; idx++)
- {
- auto partialgraph = std::make_unique<ir::Graph>();
- partialgraphs->push(ir::SubgraphIndex{idx}, std::move(partialgraph));
- }
- model->primary_subgraph()->setPartialModel(partialgraphs);
-
- auto partial_graph = primary_subgraph()->partialgraphs();
-
- primary_subgraph()->operands().iterate(
- [&](const ir::OperandIndex &operand_index, const ir::Operand &operand) {
- auto use_operations = operand.getUses();
-
- for (auto use_operation : use_operations)
- {
- auto graph_index = options.partial_graph_options.index_to_graph.find(use_operation);
- if (graph_index == options.partial_graph_options.index_to_graph.end())
- {
- throw std::runtime_error("Invalid Partition Map");
- }
- auto partition = partial_graph->at(graph_index->second);
-
- if (partition->operands().exist(operand_index))
- {
- continue;
- }
-
- auto new_operand = std::make_unique<ir::Operand>(operand);
- new_operand->clearDefUse();
- auto new_operand_ind = partition->addOperand(operand_index, std::move(new_operand));
- UNUSED_RELEASE(new_operand_ind);
- assert(new_operand_ind == operand_index);
- }
- });
-
- primary_subgraph()->operations().iterate(
- [&](const ir::OperationIndex &operation_index, const ir::Operation &operation) {
- auto graph_index = options.partial_graph_options.index_to_graph.find(operation_index);
- if (graph_index == options.partial_graph_options.index_to_graph.end())
- {
- throw std::runtime_error("Invalid Partition Map");
- }
- auto partition = partial_graph->at(graph_index->second);
-
- auto operand_io = (operation.getInputs() + operation.getOutputs()) | ir::Remove::DUPLICATED |
- ir::Remove::UNDEFINED;
- for (auto operand_index : operand_io)
- {
- if (partition->operands().exist(operand_index))
- continue;
-
- const auto &operand = primary_subgraph()->operands().at(operand_index);
-
- auto new_operand = std::make_unique<ir::Operand>(operand);
- new_operand->clearDefUse();
-
- auto new_operand_index = partition->addOperand(operand_index, std::move(new_operand));
- UNUSED_RELEASE(new_operand_index);
- assert(new_operand_index == operand_index);
- }
-
- auto new_operation_index = partition->addOperation(operation_index, clone(operation));
- UNUSED_RELEASE(new_operation_index);
- assert(new_operation_index == operation_index);
- });
-
- for (uint32_t idx = 0; idx < partial_graph->subgraphs_count(); idx++)
- {
- auto partition = partial_graph->at(ir::SubgraphIndex{idx});
-
- partition->operands().iterate([&](const ir::OperandIndex &operand_index,
- const ir::Operand &operand) {
- if (primary_subgraph()->getInputs().contains(operand_index) ||
- (!operand.getDef().valid() && !operand.isConstant()))
- {
- partition->addInput(operand_index, primary_subgraph()->tensor_names()->at(operand_index));
- }
- if (primary_subgraph()->getOutputs().contains(operand_index) || operand.getUses().size() == 0)
- {
- partition->addOutput(operand_index, primary_subgraph()->tensor_names()->at(operand_index));
- }
-
- if (primary_subgraph()->operands().at(operand_index).getUses().size() > 1 &&
- !primary_subgraph()->operands().at(operand_index).isConstant() &&
- !partition->getInputs().contains(operand_index))
- {
- auto use_operations = primary_subgraph()->operands().at(operand_index).getUses();
- auto iter = use_operations.begin();
- ir::SubgraphIndex graph_index =
- options.partial_graph_options.index_to_graph.find(*iter++)->second;
- while (iter != use_operations.end())
- {
- if (graph_index != options.partial_graph_options.index_to_graph.find(*iter)->second &&
- !partition->getOutputs().contains(operand_index))
- {
- partition->addOutput(operand_index,
- primary_subgraph()->tensor_names()->at(operand_index));
- }
- iter++;
- }
- }
- });
-
- partition->verify();
-
- bool same = true;
- if (partition->getInputs().size() == primary_subgraph()->getInputs().size())
- {
- for (auto iter = partition->getInputs().begin(); iter != partition->getInputs().end(); ++iter)
- {
- if (!primary_subgraph()->getInputs().contains(*iter))
- {
- same = false;
- break;
- }
- }
- if (same == true)
- {
- partition->getInputs() = primary_subgraph()->getInputs();
- }
- else
- {
- partition->input_sort();
- }
- }
-
- same = true;
- if (partition->getOutputs().size() == primary_subgraph()->getOutputs().size())
- {
- for (auto iter = partition->getOutputs().begin(); iter != partition->getOutputs().end();
- ++iter)
- {
- if (!primary_subgraph()->getOutputs().contains(*iter))
- {
- same = false;
- break;
- }
- }
- if (same == true)
- {
- partition->getOutputs() = primary_subgraph()->getOutputs();
- }
- else
- {
- partition->output_sort();
- }
- }
- }
- return true;
+ // Use for single model only
+ assert(nnpkg->model_count() == 1);
}
std::shared_ptr<CompilerArtifact> Compiler::compile(void)
{
- for (auto options : _voptions)
- {
- // Set control flow backend for control flow operators
- auto &builtin_id = backend::builtin::Config::ID;
- options->manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
- options->manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
- options->manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
-
- // FIXME This is a workaround for bcq operations, should remove it
- options->manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
- options->manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
-
- // FIXME This is a workaround for bulk operations, should remove it
- options->manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
-
- verboseOptions(*options);
- }
-
- // NYI: allow one model compilation
- auto const model_count = _nnpkg->model_count();
- if (model_count != _voptions.size())
- throw std::runtime_error{"Model count and option vector size mismatch"};
-
- for (uint32_t i = 0; i < model_count; i++)
- {
- _nnpkg->model(ir::ModelIndex{i})->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
- // Mandatory passes
- pass::PassRunner{}
- .append(std::make_unique<pass::ConstantOutputPass>(subg))
- .append(std::make_unique<pass::OddOutputPass>(subg))
- .run();
-
- // Optimizations
- pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
- });
- }
-
/***************************************************
* Prepare compilation phase
***************************************************/
- // Compilable check
- // TODO: Support hybrid execution -
- // execution between interpreter and compiled executor (including control flow)
- if (_voptions[0]->disable_compile)
- {
- if (model_count > 1)
- throw std::runtime_error{"NYI: Disable compilation for multi model is not supported yet"};
+ if (!_options)
+ throw std::runtime_error{"Empty compile option"};
- auto executors = std::make_shared<exec::Executors>();
+ // Mode check
+ // TODO handle option for each model
+ if (_options->he_profiling_mode)
+ {
+ if (!_options->he_scheduler)
+ throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
- _nnpkg->primary_model()->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
- executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
- });
- _state = State::COMPILED;
- return std::make_shared<CompilerArtifact>(executors, nullptr);
+ if (_options->executor != "Dataflow")
+ throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
}
- // Mode check
- // TODO handle option for each model
- if (_voptions[0]->he_profiling_mode)
- checkProfilerConditions();
+ _options->forceInternalOptions();
+ _options->verboseOptions();
+
+ _model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
+ // Mandatory passes
+ pass::PassRunner{}
+ .append(std::make_unique<pass::ConstantOutputPass>(subg))
+ .append(std::make_unique<pass::OddOutputPass>(subg))
+ .run();
+
+ // Optimizations
+ pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
+ });
/***************************************************
* Backend independent analysis & optimization phase
***************************************************/
// TODO Handle dump level for each model
- auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_voptions[0]->graph_dump_level);
+ auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options->graph_dump_level);
onert::dumper::dot::DotDumper dot_dumper(dump_level);
// Tracing context
auto tracing_ctx = std::make_unique<util::TracingCtx>();
- // Model edge context
- std::unique_ptr<ir::ModelEdges> model_edges = nullptr;
-
// Lower: Assign backend
std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs;
-
- if (model_count == 1)
{
- _nnpkg->primary_model()->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
- dot_dumper.dump(subg, nnfw::misc::str("before_lower_subg-", index.value()));
+ _model->iterate([&](const ir::SubgraphIndex &subg_index, ir::Graph &subg) {
// Lower: Assign backend
- lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, *_voptions[0]);
+ lowered_subgs[subg_index] = std::make_unique<compiler::LoweredGraph>(subg, *_options);
// Set tracing_ctx for copied graph
- tracing_ctx->setSubgraphIndex(&(lowered_subgs[index]->graph()), index.value());
+ if (tracing_ctx != nullptr)
+ tracing_ctx->setSubgraphIndex(&(lowered_subgs[subg_index]->graph()), subg_index.value());
});
}
- else
- {
- // TODO Support tracing_ctx for multiple model
- tracing_ctx = nullptr;
- // Copy model edge context
- model_edges = std::make_unique<ir::ModelEdges>(_nnpkg->model_edges());
+ _model.reset();
- for (uint32_t i = 0; i < model_count; i++)
- {
- auto model = _nnpkg->model(ir::ModelIndex{i});
- if (model->subgraphs_count() != 1)
- throw std::runtime_error{"NYI: Lowering subgraphs for multiple model is not supported yet"};
- auto subg = model->primary_subgraph();
- dot_dumper.dump(*subg, nnfw::misc::str("before_lower_model-", i));
-
- // For multimodel, model index is used for lowered graph index in lowered graph map
- // and index type is SubgraphIndex
- // TODO Find better way to represent lowered graph index for multimodel's subgraph
- lowered_subgs[ir::SubgraphIndex{i}] =
- std::make_unique<compiler::LoweredGraph>(*model->primary_subgraph(), *_voptions[i]);
- }
- }
-
- _nnpkg.reset();
-
- for (auto &pair : lowered_subgs)
+ for (const auto &pair : lowered_subgs)
{
const auto &subg_index = pair.first;
- auto &lowered_subg = pair.second;
- dot_dumper.dump(*lowered_subg, "after_lower_subg-" + std::to_string(subg_index.value()));
+ const auto &lowered_subg = pair.second;
+ dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_subg-", subg_index.value()));
}
// Shape inference.
// Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called
// recursively
std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
- createStaticShapeInferers(lowered_subgs);
+ StaticShapeInferer::createStaticShapeInferers(lowered_subgs);
- if (model_count == 1)
- {
- const auto primary_subg_idx = ir::SubgraphIndex{0};
- inferers.at(primary_subg_idx)->infer();
+ const auto primary_subg_idx = ir::SubgraphIndex{0};
+ inferers.at(primary_subg_idx)->infer();
- for (const auto &pair : inferers)
- {
- const auto inferer = pair.second.get();
- inferer->dump();
- }
- }
- else
+ for (const auto &pair_inferer : inferers)
{
- // Assume multi model has only one subgraph on each model
- for (const auto &pair : inferers)
- {
- const auto inferer = pair.second.get();
- inferer->infer();
- inferer->dump();
- }
+ const auto inferer = pair_inferer.second.get();
+ inferer->dump();
}
}
// - Check parameter value validation which valid value is depend on input tensor shape
// - Output tensor shape validation check is needless because
// static/dynamic shape inferer will make valid output shape
- for (auto &pair : lowered_subgs)
+ for (const auto &pair : lowered_subgs)
{
auto &lowered_subg = pair.second;
compiler::ShapeValidator{lowered_subg->graph()}();
/*************************************************************
* Backend independent analysis & optimization phase finished
*************************************************************/
- auto executors = std::make_shared<exec::Executors>(std::move(model_edges));
- for (auto &pair : lowered_subgs)
+ auto executors = std::make_shared<exec::SingleModelExecutors>();
+ for (auto &&pair : lowered_subgs)
{
- const auto &subg_index = pair.first;
+ auto const model_index = ir::ModelIndex{0};
+ auto const subg_index = pair.first;
auto &lowered_subg = pair.second;
- auto indexed_ranks = lowered_subg->indexed_ranks();
+ auto const indexed_ranks = lowered_subg->indexed_ranks();
ir::OperationDumper dumper("Executor generation of Subgraph " +
std::to_string(subg_index.value()));
lowered_subg->graph().operations().iterate(
[&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
- auto &options = (model_count > 1) ? *_voptions[subg_index.value()] : *_voptions[0];
auto executor = std::unique_ptr<exec::IExecutor>{ExecutorFactory::get().create(
- std::move(lowered_subg), tracing_ctx.get(), options, executors)};
+ std::move(lowered_subg), tracing_ctx.get(), *_options, executors, model_index)};
executor->setIndexedRanks(indexed_ranks);
- executors->emplace(subg_index, std::move(executor));
+ executors->emplace(model_index, subg_index, std::move(executor));
}
/********************************
* Code generation phase finished
********************************/
- _state = State::COMPILED;
return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx));
}
-std::vector<std::shared_ptr<CompilerArtifact>> Compiler::compile(const char *package_file_path,
- const char *map_file_path)
-{
- // Allow one model compilation for pipeline
- if (_nnpkg->model_count() != 1)
- throw std::runtime_error{"Multiple models compilation for pipeline is not supported yet."};
- assert(_voptions.size() == 1);
-
- auto model = _nnpkg->primary_model();
- auto &options = *_voptions[0];
-
- std::string package_path(package_file_path);
- std::string partition_map_file;
-
- if (map_file_path)
- {
- partition_map_file = map_file_path;
- }
- else
- {
- partition_map_file = package_path + "/partition_map.json";
- }
-
- std::ifstream pmfs(partition_map_file);
- Json::Value root;
- pmfs >> root;
- const Json::Value &map = root["partition_map"];
- const Json::Value &np = root["num_partitions"];
-
- uint32_t num_graphs = 1;
-
- if (pmfs.is_open())
- {
- num_graphs = np.asUInt();
- for (uint32_t i = 0; i < (uint32_t)map.size(); ++i)
- {
- options.partial_graph_options.index_to_graph[ir::OperationIndex{i}] =
- ir::SubgraphIndex{map[i].asUInt()};
- }
- }
- else
- {
- throw std::runtime_error("There is no partition map file");
- }
-
- if (!buildPartialGraph(num_graphs))
- {
- throw std::runtime_error("It doesn't support in case there are subgraphs");
- }
-
- // Set control flow backend for control flow operators
- {
- auto &builtin_id = backend::builtin::Config::ID;
- options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
- options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
- options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
- }
-
- // FIXME This is a workaround for bcq operations, should remove it
- {
- options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
- options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
- }
-
- // FIXME This is a workaround for bulk operations, should remove it
- {
- options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
- }
-
- verboseOptions(options);
-
- model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
- // Mandatory passes
- auto part = subg.partialgraphs();
- part->iterate([&](const ir::SubgraphIndex &, ir::Graph &partialgraph) {
- pass::PassRunner{}
- .append(std::make_unique<pass::ConstantOutputPass>(partialgraph))
- .append(std::make_unique<pass::OddOutputPass>(partialgraph))
- .run();
-
- // Optimizations
- pass::PassRunner{}
- .append(std::make_unique<pass::UnusedOperandEliminationPass>(partialgraph))
- .run();
- });
- });
-
- /***************************************************
- * Prepare compilation phase
- ***************************************************/
-
- // Compilable check
- // TODO: Support hybrid execution -
- // execution between interpreter and compiled executor (including control flow)
- if (options.disable_compile)
- {
- std::vector<std::shared_ptr<CompilerArtifact>> results;
- auto executors = std::make_shared<exec::Executors>();
-
- model->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
- executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
- });
- results.push_back(std::make_shared<CompilerArtifact>(executors, nullptr));
- _state = State::COMPILED;
- return results;
- }
-
- // Mode check
- if (options.he_profiling_mode)
- checkProfilerConditions();
-
- /***************************************************
- * Backend independent analysis & optimization phase
- ***************************************************/
- auto dump_level = static_cast<dumper::dot::DotDumper::Level>(options.graph_dump_level);
- onert::dumper::dot::DotDumper dot_dumper_part(dump_level);
-
- // Lower: Assign backend
- std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
- lowered_partialgraphs;
- model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
- auto part = subg.partialgraphs();
- part->iterate([&](const ir::SubgraphIndex &pindex, ir::Graph &partialgraph) {
- dot_dumper_part.dump(partialgraph,
- nnfw::misc::str("before_lower_subg_partialgraph-", pindex.value()));
-
- // // Lower: Assign backend
- lowered_partialgraphs[pindex] =
- std::make_unique<compiler::LoweredGraph>(subg, partialgraph, options);
- });
- });
-
- for (auto &pair : lowered_partialgraphs)
- {
-
- const auto &partialgraph_index = pair.first;
- auto &lowered_partialgraph = pair.second;
- dot_dumper_part.dump(*lowered_partialgraph, "after_lower_subg_partialgraph-" +
- std::to_string(partialgraph_index.value()));
- }
-
- // Partial Graph shape inference
- std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
- createStaticShapeInferers(lowered_partialgraphs);
- // NOTE If partialgraph has subgraphs StaticShapeInferer may be called multiple times
- for (auto &pair : lowered_partialgraphs)
- {
- const auto &partialgraph_index = pair.first;
- const auto partial_inferer = inferers.at(partialgraph_index).get();
- partial_inferer->infer();
- partial_inferer->dump();
- }
-
- // Shape validation
- // TODO Move shape independent feature check from ShapeValidator to OperationValidator
- // TODO Move ShapeValidator into shape inference
- // - Check input tensor shape validation
- // - Check parameter value validation which valid value is depend on input tensor shape
- // - Output tensor shape validation check is needless because
- // static/dynamic shape inferer will make valid output shape
- for (auto &pair : lowered_partialgraphs)
- {
- auto &lowered_partialgraph = pair.second;
- compiler::ShapeValidator{lowered_partialgraph->graph()}();
- }
-
- /*************************************************************
- * Backend independent analysis & optimization phase finished
- *************************************************************/
- std::map<uint32_t, std::unique_ptr<compiler::LoweredGraph>> ordered;
- for (auto &pair : lowered_partialgraphs)
- {
- // const auto &partialgraph_index = pair.first;
- auto &lowered_partialgraph = pair.second;
-
- ordered.insert(make_pair(pair.first.value(), std::move(lowered_partialgraph)));
- }
-
- std::vector<std::shared_ptr<CompilerArtifact>> results;
- for (auto &pair : ordered)
- {
- auto executors = std::make_shared<exec::Executors>();
-
- const auto &partialgraph_index = ir::SubgraphIndex(pair.first);
- auto &lowered_partialgraph = pair.second;
- auto indexed_ranks = lowered_partialgraph->indexed_ranks();
- ir::OperationDumper dumper("Executor generation of Subgraph " +
- std::to_string(partialgraph_index.value()));
- lowered_partialgraph->graph().operations().iterate(
- [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
- auto executor = std::unique_ptr<exec::IExecutor>{
- ExecutorFactory::get().create(std::move(lowered_partialgraph), nullptr, options, executors)};
- executor->setIndexedRanks(indexed_ranks);
- executors->emplace(ir::SubgraphIndex{0}, std::move(executor));
-
- // It doesn't support tracing in case of partial graph
- results.push_back(std::make_shared<CompilerArtifact>(executors, nullptr));
- }
-
- _nnpkg.reset();
- /********************************
- * Code generation phase finished
- ********************************/
- _state = State::COMPILED;
-
- return results;
-}
-
} // namespace compiler
-
} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/CompilerFactory.h"
+
+#include "MultiModelCompiler.h"
+
+#include "compiler/Compiler.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+CompilerFactory &CompilerFactory::get()
+{
+ static CompilerFactory singleton;
+ return singleton;
+}
+
+std::unique_ptr<ICompiler>
+CompilerFactory::create(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts)
+{
+ if (nnpkg->model_count() == 1)
+ return std::make_unique<Compiler>(nnpkg, copts);
+
+ return std::make_unique<MultiModelCompiler>(nnpkg, copts);
+}
+
+} // namespace compiler
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/CompilerOptions.h"
+
+#include "../backend/builtin/Backend.h"
+
+#include "util/ConfigSource.h"
+#include "util/logging.h"
+
+#include <misc/string_helpers.h>
+
+namespace
+{
+
+using namespace onert;
+
+std::string getOpBackends(std::unordered_map<ir::OpCode, std::string> &opcode_to_backend)
+{
+ std::unordered_map<ir::OpCode, std::string>::iterator it;
+ std::string opbackends;
+
+ for (it = opcode_to_backend.begin(); it != opcode_to_backend.end(); ++it)
+ {
+ if (!opbackends.empty())
+ opbackends = opbackends + ", ";
+
+ auto opcode = it->first;
+ const std::string opname = ir::toString(opcode);
+ opbackends += opname + "=" + it->second;
+ }
+ return opbackends;
+}
+
+} // namespace
+
+namespace onert
+{
+namespace compiler
+{
+
+void ManualSchedulerOptions::setBackendMap(const std::string &str)
+{
+ // TODO Support multiple subgraphs for manual scheduling
+ auto key_val_list = nnfw::misc::split(str, ';');
+ for (const auto &key_val_str : key_val_list)
+ {
+ if (key_val_str.empty())
+ {
+ continue;
+ }
+
+ auto key_val = nnfw::misc::split(key_val_str, '=');
+ const auto &key_str = key_val.at(0);
+ const auto &val = key_val.at(1);
+ auto key = static_cast<uint32_t>(std::stoi(key_str));
+ this->index_to_backend.emplace(ir::OperationIndex{key}, val);
+ }
+}
+
+std::unique_ptr<CompilerOptions> CompilerOptions::fromGlobalConfig()
+{
+ auto o = std::make_unique<CompilerOptions>();
+ o->backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
+ o->trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
+ o->graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
+ o->executor = util::getConfigString(util::config::EXECUTOR);
+ o->he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
+ o->he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
+ o->fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
+ {
+ // Backend for all
+ auto &ms_options = o->manual_scheduler_options;
+
+ // Default value for op_backend_all is first element in the backend list
+ ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS);
+
+// Opcode to Backend
+#define OP(OpName) \
+ { \
+ const auto &backend_str = util::getConfigString(util::config::OP_BACKEND_##OpName); \
+ if (!backend_str.empty()) \
+ { \
+ ms_options.opcode_to_backend[ir::OpCode::OpName] = backend_str; \
+ } \
+ }
+#include "ir/Operations.lst"
+#undef OP
+
+ // Index to Backend
+ auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
+ ms_options.setBackendMap(map_str);
+ }
+ return o;
+}
+
+void CompilerOptions::forceInternalOptions()
+{
+ // Set control flow backend for control flow operators
+ auto &builtin_id = backend::builtin::Config::ID;
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
+
+ // FIXME This is a workaround for bcq operations, should remove it
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+
+ // FIXME This is a workaround for bulk operations, should remove it
+ manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
+}
+
+void CompilerOptions::verboseOptions()
+{
+ VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl;
+ VERBOSE(Compiler) << "backend_list : "
+ << nnfw::misc::join(backend_list.begin(), backend_list.end(), "/") << std::endl;
+ VERBOSE(Compiler) << "trace_filepath : " << trace_filepath << std::endl;
+ VERBOSE(Compiler) << "graph_dump_level : " << graph_dump_level << std::endl;
+ VERBOSE(Compiler) << "executor : " << executor << std::endl;
+ VERBOSE(Compiler) << "manual backend_for_all : " << manual_scheduler_options.backend_for_all
+ << std::endl;
+ VERBOSE(Compiler) << "manual_scheduler_options : "
+ << getOpBackends(manual_scheduler_options.opcode_to_backend) << std::endl;
+ VERBOSE(Compiler) << "he_scheduler : " << he_scheduler << std::endl;
+ VERBOSE(Compiler) << "he_profiling_mode : " << he_profiling_mode << std::endl;
+ VERBOSE(Compiler) << "fp16_enable : " << fp16_enable << std::endl
+ << std::noboolalpha;
+}
+
+} // namespace compiler
+} // namespace onert
// Create contexts
auto whole_op_order = lgraph.graph().topolSortOperations();
- for (auto &pair : context_data_map)
+ for (auto &&pair : context_data_map)
{
auto backend = pair.first;
auto &data = pair.second;
ExecutorFactory::ExecutorFactory()
{
_map["Linear"] = createLinearExecutor;
- _map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
- std::placeholders::_3, std::placeholders::_4, false);
- _map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
- std::placeholders::_3, std::placeholders::_4, true);
+ _map["Dataflow"] =
+ std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, std::placeholders::_4, std::placeholders::_5, false);
+ _map["Parallel"] =
+ std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, std::placeholders::_4, std::placeholders::_5, true);
}
exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const util::TracingCtx *tracing_ctx,
const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::Executors> &executors)
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ir::ModelIndex &index)
{
- return _map.at(options.executor)(std::move(lowered_graph), tracing_ctx, options, executors);
+ return _map.at(options.executor)(std::move(lowered_graph), tracing_ctx, options, executors,
+ index);
}
void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
}
void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs,
- const std::shared_ptr<exec::Executors> &executors,
- const backend::BackendContexts &backend_contexts)
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const backend::BackendContexts &backend_contexts,
+ const ir::ModelIndex &index)
{
- for (auto &pair : backend_contexts)
+ for (auto &&pair : backend_contexts)
{
auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get());
if (builtin_context != nullptr)
auto builtin_kernel_gen = builtin_context->kernel_gen;
builtin_kernel_gen->setTensorRegistries(tensor_regs);
builtin_kernel_gen->setExecutors(executors);
+ builtin_kernel_gen->setModelIndex(index);
}
}
}
{
std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
- for (auto &pair : backend_contexts)
+ for (auto &&pair : backend_contexts)
{
// NOTE builtin backend must be processed lastly.
// This is because of Permute layer's specialty which is the only operation that could have
exec::IExecutor *ExecutorFactory::createLinearExecutor(
std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
- const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors)
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors,
+ const ir::ModelIndex &index)
{
auto &graph = lowered_graph->graph();
auto order = Linear::linearize(*lowered_graph);
Linear::dump(*lowered_graph, order);
- for (auto &pair : backend_contexts)
+ for (auto &&pair : backend_contexts)
{
pair.second->genTensors();
}
prepareMigrantTensors(*lowered_graph, backend_contexts);
// Give some runtime objects to builtin KernelGenerator
- prepareBuiltinBackend(tensor_regs, executors, backend_contexts);
+ prepareBuiltinBackend(tensor_regs, executors, backend_contexts, index);
ExecutionBuilder builder;
}
// Generate kernels
- for (auto &pair : ordered_contexts)
+ for (auto &&pair : ordered_contexts)
{
auto codes = pair.second->genKernels();
- for (auto &pair : codes)
+ for (auto &&pair : codes)
{
auto &op_ind = pair.first;
auto &fn_seq = pair.second;
exec::IExecutor *ExecutorFactory::createDataflowExecutor(
std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
- const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors,
- bool parallel)
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors,
+ const ir::ModelIndex &index, bool parallel)
{
backend::BackendContexts backend_contexts =
createBackendContexts(*lowered_graph, options.executor == "Linear");
(lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs()) |
ir::Remove::DUPLICATED | ir::Remove::UNDEFINED);
- for (auto &pair : backend_contexts)
+ for (auto &&pair : backend_contexts)
{
pair.second->genTensors();
}
prepareMigrantTensors(*lowered_graph, backend_contexts);
// Give some runtime objects to builtin KernelGenerator
- prepareBuiltinBackend(tensor_regs, executors, backend_contexts);
+ prepareBuiltinBackend(tensor_regs, executors, backend_contexts, index);
ExecutionBuilder builder;
auto ordered_contexts = orderBackendContext(backend_contexts);
// Generate kernels
- for (auto &pair : ordered_contexts)
+ for (auto &&pair : ordered_contexts)
{
auto codes = pair.second->genKernels();
- for (auto &pair : codes)
+ for (auto &&pair : codes)
{
auto &op_ind = pair.first;
auto &fn_seq = pair.second;
#include "backend/ITensor.h"
#include "compiler/LoweredGraph.h"
-#include "exec/Executors.h"
+#include "exec/IExecutors.h"
#include <deque>
#include <unordered_map>
exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
const util::TracingCtx *tracing_ctx,
const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::Executors> &executors);
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const ir::ModelIndex &index);
private:
ExecutorFactory();
static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
const backend::BackendContexts &backend_contexts);
static void prepareBuiltinBackend(const TensorRegistries &tensor_regs,
- const std::shared_ptr<exec::Executors> &executors,
- const backend::BackendContexts &backend_contexts);
+ const std::shared_ptr<exec::IExecutors> &executors,
+ const backend::BackendContexts &backend_contexts,
+ const ir::ModelIndex &index);
static std::deque<std::pair<const backend::Backend *, backend::BackendContext *>>
orderBackendContext(const backend::BackendContexts &backend_contexts);
static exec::IExecutor *createLinearExecutor(
std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
- const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors);
- static exec::IExecutor *
- createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const util::TracingCtx *tracing_ctx,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::Executors> &executors, bool parallel);
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors,
+ const ir::ModelIndex &index);
+ static exec::IExecutor *createDataflowExecutor(
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors,
+ const ir::ModelIndex &index, bool parallel);
private:
std::unordered_map<
std::string,
std::function<exec::IExecutor *(
std::unique_ptr<compiler::LoweredGraph>, const util::TracingCtx *tracing_ctx,
- const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors)>>
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::IExecutors> &executors,
+ const ir::ModelIndex &index)>>
_map;
};
const auto &op_seq_inputs = _lowered_graph.graph().getInputs();
const auto &op_seq_outputs = _lowered_graph.graph().getOutputs();
- for (auto &op_idx : op_seq)
+ for (const auto &op_idx : op_seq)
{
const auto &node = operations.at(op_idx);
- for (auto &ind : node.getInputs() | ir::Remove::UNDEFINED)
+ for (const auto &ind : node.getInputs() | ir::Remove::UNDEFINED)
{
if (node.opcode() == ir::OpCode::ConvertFp32ToFp16 || op_seq_inputs.contains(ind))
continue;
VERBOSE(Fp32ToFp16Converter) << "Input Operand " << ind << ": fp16" << std::endl;
}
- for (auto &ind : node.getOutputs())
+ for (const auto &ind : node.getOutputs())
{
if (node.opcode() == ir::OpCode::ConvertFp16ToFp32 || op_seq_outputs.contains(ind))
continue;
// | |
// [OPERATION] [OPERATION]
//
- for (auto &op_seq_ind : found_input_in_op_seqs->second)
+ for (const auto &op_seq_ind : found_input_in_op_seqs->second)
{
auto found_in_fp32_to_fp16 = _list_fp32_to_fp16.find(op_seq_ind);
if (found_in_fp32_to_fp16 != _list_fp32_to_fp16.end())
OpSeqIndexList list;
for (const auto &it : opseq_map_to_delete)
{
- auto &opseq_ind_fp16_to_fp32 = it.first;
+ const auto &opseq_ind_fp16_to_fp32 = it.first;
if (list.find(opseq_ind_fp16_to_fp32) == list.end())
{
list.emplace(opseq_ind_fp16_to_fp32);
}
- for (auto &opseq_ind_fp32_to_fp16 : it.second)
+ for (const auto &opseq_ind_fp32_to_fp16 : it.second)
{
if (list.find(opseq_ind_fp32_to_fp16) == list.end())
{
auto &op_seq_fp16_to_fp32 = op_seqs.at(op_seq_ind_fp16_to_fp32);
auto &input_ind_fp16_to_fp32 = op_seq_fp16_to_fp32.getInputs().at(0);
- for (auto &op_seq_ind_fp32_to_fp16 : it.second)
+ for (const auto &op_seq_ind_fp32_to_fp16 : it.second)
{
auto &op_seq_fp32_to_fp16 = op_seqs.at(op_seq_ind_fp32_to_fp16);
assert(op_seq_fp32_to_fp16.size() == 1);
auto found_next_to_fp16 = input_to_op_seqs.find(output_ind_fp32_to_fp16);
assert(found_next_to_fp16 != input_to_op_seqs.end());
- for (auto &op_seq_ind_next_to_fp16 : found_next_to_fp16->second)
+ for (const auto &op_seq_ind_next_to_fp16 : found_next_to_fp16->second)
{
manipulateInput(op_seq_ind_next_to_fp16, output_ind_fp32_to_fp16, input_ind_fp16_to_fp32);
}
auto &operations = _lowered_graph.graph().operations();
auto &op_seqs = _lowered_graph.op_seqs();
- for (auto &op_seq_ind : list_to_delete_op_seqs)
+ for (const auto &op_seq_ind : list_to_delete_op_seqs)
{
auto &op_seq = op_seqs.at(op_seq_ind);
assert(op_seq.size() == 1);
VERBOSE(Fp32ToFp16Converter) << "Delete Node " << first_node_ind << std::endl;
// Uses
- for (auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto &ind : first_node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
auto &obj = operands.at(ind);
obj.removeUse(first_node_ind);
}
// Def
- for (auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
+ for (const auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
auto &obj = operands.at(ind);
assert(obj.getDef() == first_node_ind);
}
// Operand
- for (auto &ind : list_to_delete_ops)
+ for (const auto &ind : list_to_delete_ops)
{
operands.remove(ind);
VERBOSE(Fp32ToFp16Converter) << "Operand " << ind << " is removed" << std::endl;
// Find free time for data transferring and insert it into backend taskset. This is needed:
// 1. Time for multiple permutations for this node's input is found correctly
// 2. If backend==cpu, then free time for this node must come after permutations
- for (auto &it : transfer_st_exec_time)
+ for (auto &&it : transfer_st_exec_time)
{
if (_is_parallel_exec)
{
ExecTime et(backends);
for (int i = 0; i < op_names.size(); ++i)
{
- for (auto &backend : backends)
+ for (const auto backend : backends)
setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
}
et.storeOperationsExecTime();
ExecTime et(backends);
for (const auto &backend : backends)
{
- for (auto &other_backend : backends)
+ for (const auto other_backend : backends)
{
if (backend == other_backend)
continue;
lowerGraph(options);
}
-// TODO Design better class and constructor to represent parent_graph
-LoweredGraph::LoweredGraph(const ir::Graph &parent_graph, const ir::Graph &graph,
- const CompilerOptions &options)
- : _graph{graph}, _parent_graph{parent_graph}
-{
- lowerGraph(options);
-}
-
void LoweredGraph::lowerGraph(const CompilerOptions &options)
{
// Build backend contexts
// 2. Backend per operation type
std::unordered_map<ir::OpCode, backend::Backend *> op_type_map;
- for (auto &pair : manual_options.opcode_to_backend)
+ for (const auto &pair : manual_options.opcode_to_backend)
{
op_type_map.emplace(pair.first, BackendManager::get().get(pair.second));
}
});
// 3. Backend per operation
- for (auto &pair : manual_options.index_to_backend)
+ for (const auto &pair : manual_options.index_to_backend)
{
const auto &key = pair.first;
const auto &val = pair.second;
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MultiModelCompiler.h"
+
+#include "ExecutorFactory.h"
+#include "ShapeValidator.h"
+#include "pass/ConstantOutputPass.h"
+#include "pass/OddOutputPass.h"
+#include "pass/PassRunner.h"
+#include "pass/UnusedOperandEliminationPass.h"
+#include "../dumper/dot/DotDumper.h"
+#include "../exec/Executors.h"
+#include "../ir/OperationDumper.h"
+#include "../ir/verifier/Verifier.h"
+
+#include "compiler/StaticShapeInferer.h"
+
+#include <misc/string_helpers.h>
+
+namespace onert
+{
+namespace compiler
+{
+
+MultiModelCompiler::MultiModelCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts)
+ : _nnpkg{nnpkg}, _voptions{}
+{
+ assert(nnpkg->model_count() != 1);
+
+ for (uint32_t i = 0; i < copts.size(); i++)
+ {
+ _voptions.push_back(copts[i].get());
+ }
+}
+
+std::shared_ptr<CompilerArtifact> MultiModelCompiler::compile(void)
+{
+ /***************************************************
+ * Prepare compilation phase
+ ***************************************************/
+ for (auto options : _voptions)
+ {
+ if (!options)
+ throw std::runtime_error{"Empty compile option"};
+
+ // Mode check
+ // TODO handle option for each model
+ if (options->he_profiling_mode)
+ throw std::runtime_error("NYI: Profiling mode for multiple model is not supported yet");
+
+ options->forceInternalOptions();
+ options->verboseOptions();
+ }
+
+ // NYI: allow one model compilation
+ auto const model_count = _nnpkg->model_count();
+ if (model_count != _voptions.size())
+ throw std::runtime_error{"Model count and option vector size mismatch"};
+
+ for (uint16_t i = 0; i < model_count; i++)
+ {
+ _nnpkg->model(ir::ModelIndex{i})->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
+ // Mandatory passes
+ pass::PassRunner{}
+ .append(std::make_unique<pass::ConstantOutputPass>(subg))
+ .append(std::make_unique<pass::OddOutputPass>(subg))
+ .run();
+
+ // Optimizations
+ pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
+ });
+ }
+
+ /***************************************************
+ * Backend independent analysis & optimization phase
+ ***************************************************/
+ // TODO Handle dump level for each model
+ auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_voptions[0]->graph_dump_level);
+ onert::dumper::dot::DotDumper dot_dumper(dump_level);
+
+ // Tracing context
+ // TODO Support tracing_ctx for multiple model
+ std::unique_ptr<util::TracingCtx> tracing_ctx = nullptr;
+
+ // Model edge context: copy model edge context
+ auto model_edges = std::make_unique<ir::ModelEdges>(_nnpkg->model_edges());
+
+ // Lower: Assign backend
+ std::unordered_map<ir::ModelIndex,
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>>
+ lowered_subgs;
+
+ for (uint16_t i = 0; i < model_count; i++)
+ {
+ auto const model_index = ir::ModelIndex{i};
+ auto model = _nnpkg->model(model_index);
+
+ model->iterate([&](const ir::SubgraphIndex &subg_index, ir::Graph &subg) {
+ dot_dumper.dump(subg,
+ nnfw::misc::str("before_lower_model-", i, "-subg-", subg_index.value()));
+ // Lower: Assign backend
+ lowered_subgs[model_index][subg_index] =
+ std::make_unique<compiler::LoweredGraph>(subg, *_voptions[i]);
+ // Set tracing_ctx for copied graph
+ if (tracing_ctx != nullptr)
+ tracing_ctx->setSubgraphIndex(&(lowered_subgs[model_index][subg_index]->graph()),
+ subg_index.value());
+ });
+ }
+
+ _nnpkg.reset();
+
+ for (const auto &pair : lowered_subgs)
+ {
+ const auto &model_index = pair.first;
+ const auto &model_lsubg = pair.second;
+
+ for (const auto &pair_inner : model_lsubg)
+ {
+ const auto &subg_index = pair_inner.first;
+ const auto &lowered_subg = pair_inner.second;
+ dot_dumper.dump(*lowered_subg, nnfw::misc::str("after_lower_model-", model_index.value(),
+ "-subg-", subg_index.value()));
+ }
+ }
+
+ // Shape inference.
+ for (auto &&pair : lowered_subgs)
+ {
+ auto &model_lsubgs = pair.second;
+ // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called
+ // recursively
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
+ StaticShapeInferer::createStaticShapeInferers(model_lsubgs);
+
+ const auto primary_subg_idx = ir::SubgraphIndex{0};
+ inferers.at(primary_subg_idx)->infer();
+
+ for (const auto &pair_inferer : inferers)
+ {
+ const auto inferer = pair_inferer.second.get();
+ inferer->dump();
+ }
+ }
+
+ // Shape validation
+ // TODO Move shape independent feature check from ShapeValidator to OperationValidator
+ // TODO Move ShapeValidator into shape inference
+ // - Check input tensor shape validation
+ // - Check parameter value validation which valid value is depend on input tensor shape
+ // - Output tensor shape validation check is needless because
+ // static/dynamic shape inferer will make valid output shape
+ for (const auto &pair : lowered_subgs)
+ {
+ const auto &model_lsubgs = pair.second;
+
+ for (const auto &pair_inner : model_lsubgs)
+ {
+ const auto &lowered_subg = pair_inner.second;
+ compiler::ShapeValidator{lowered_subg->graph()}();
+ }
+ }
+
+ /*************************************************************
+ * Backend independent analysis & optimization phase finished
+ *************************************************************/
+ auto executors = std::make_shared<exec::Executors>(std::move(model_edges));
+ for (auto &&pair : lowered_subgs)
+ {
+ auto const &model_index = pair.first;
+ auto &model_lsubgs = pair.second;
+
+ for (auto &&pair_inner : model_lsubgs)
+ {
+ auto const subg_index = pair_inner.first;
+ auto &lowered_subg = pair_inner.second;
+ auto const indexed_ranks = lowered_subg->indexed_ranks();
+
+ ir::OperationDumper dumper("Executor generation of Subgraph " +
+ std::to_string(subg_index.value()));
+ lowered_subg->graph().operations().iterate(
+ [&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
+
+ auto &options = *_voptions[model_index.value()];
+ auto executor = std::unique_ptr<exec::IExecutor>{ExecutorFactory::get().create(
+ std::move(lowered_subg), tracing_ctx.get(), options, executors, model_index)};
+ executor->setIndexedRanks(indexed_ranks);
+ executors->emplace(model_index, subg_index, std::move(executor));
+ }
+ }
+
+ /********************************
+ * Code generation phase finished
+ ********************************/
+ return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx));
+}
+
+} // namespace compiler
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file MultiModelCompiler.h
+ * @brief This file contains MultiModelCompiler class to define and run compilation phase
+ */
+
+#ifndef __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__
+#define __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__
+
+#include "compiler/CompilerOptions.h"
+#include "compiler/ICompiler.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+/**
+ * @brief Class to compile NN package
+ */
+class MultiModelCompiler final : public ICompiler
+{
+public:
+ /**
+ * @brief Construct a new Compiler object for NN package
+ * @param[in] nnpkg NN package to compile
+ * @param[in] coptions Compiler option vector for each model in package
+ */
+ MultiModelCompiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts);
+
+ /**
+ * @brief Destroy the MultiModelCompiler object
+ */
+ ~MultiModelCompiler() = default;
+
+public:
+ /**
+ * @brief Do compilation with the options
+ *
+ * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
+ */
+ std::shared_ptr<CompilerArtifact> compile(void);
+
+private:
+ std::shared_ptr<ir::Graph> &primary_subgraph()
+ {
+ return _nnpkg->primary_model()->at(ir::SubgraphIndex{0});
+ }
+
+private:
+ std::shared_ptr<ir::NNPkg> _nnpkg;
+ std::vector<CompilerOptions *> _voptions;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_MULTI_MODEL_COMPILER_H__
#include "util/ShapeInference.h"
#include "util/logging.h"
+#include <misc/polymorphic_downcast.h>
+
#include <sstream>
#include <stdexcept>
});
}
+std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>>
+StaticShapeInferer::createStaticShapeInferers(
+ const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<LoweredGraph>> &lowered_subgs)
+{
+ // Allocate StaticShapeInferer per each subgraph
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers;
+ for (auto &&pair : lowered_subgs)
+ {
+ const auto &subg_index = pair.first;
+ auto &lowered_subg = pair.second;
+ inferers[subg_index] = std::make_unique<StaticShapeInferer>(lowered_subg.get());
+ }
+
+ // Append observers in all StaticShapeInferers
+ for (auto &&pair : lowered_subgs)
+ {
+ const auto &subg_index = pair.first;
+ auto &lowered_subg = pair.second;
+
+ // TODO: Change this iteration for all to controlflow iteration
+ lowered_subg->graph().operations().iterate(
+ [&](const ir::OperationIndex &, const ir::Operation &op) {
+ // A Function to append child inferers. These make it possible for a StaticShapeInferer to
+ // call StaticShapeInferes of child subgraphs recursively
+ auto appendChildInferer = [&](const ir::SubgraphIndex &child_subg_idx) {
+ auto *child_inferer = inferers.at(child_subg_idx).get();
+ inferers.at(subg_index)->appendChildInferer(child_subg_idx, child_inferer);
+ };
+
+ // A Function to appaend subg input observers. This makes it possible for a
+ // StaticShapeInferer to update inputs of child subgraphs
+ auto appendSubgraphInputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
+ std::vector<ir::Operand *> child_subg_inputs;
+ auto &child_subg = lowered_subgs.at(child_subg_idx)->graph();
+ for (const auto &input_idx : child_subg.getInputs())
+ {
+ auto operand_ptr = child_subg.operands().getRawPtr(input_idx);
+ child_subg_inputs.emplace_back(operand_ptr);
+ }
+ inferers.at(subg_index)
+ ->appendSubgInputObserver(child_subg_idx,
+ std::make_unique<OperandObserver>(child_subg_inputs));
+ };
+
+ // A Function to set controlflow output observers. This makes it possible for a
+ // StaticShapeInferer to update outputs of parent controlflow opeerations
+ auto setControlFlowOutputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
+ std::vector<ir::Operand *> cf_outputs;
+ auto &subg = lowered_subg->graph();
+ for (const auto &output_idx : op.getOutputs())
+ {
+ auto operand_ptr = subg.operands().getRawPtr(output_idx);
+ cf_outputs.emplace_back(operand_ptr);
+ }
+ inferers.at(child_subg_idx)
+ ->setControlflowOutputObserver(std::make_unique<OperandObserver>(cf_outputs));
+ };
+
+ // Append Observers in a StaticShapeInferer
+ if (op.opcode() == ir::OpCode::If)
+ {
+ const auto &if_op = nnfw::misc::polymorphic_downcast<const ir::operation::If &>(op);
+
+ appendChildInferer(if_op.param().then_subg_index);
+ appendChildInferer(if_op.param().else_subg_index);
+
+ appendSubgraphInputObserver(if_op.param().then_subg_index);
+ appendSubgraphInputObserver(if_op.param().else_subg_index);
+
+ setControlFlowOutputObserver(if_op.param().then_subg_index);
+ }
+ else if (op.opcode() == ir::OpCode::While)
+ {
+ const auto &while_op = nnfw::misc::polymorphic_downcast<const ir::operation::While &>(op);
+
+ appendChildInferer(while_op.param().cond_subg_index);
+ appendChildInferer(while_op.param().body_subg_index);
+
+ appendSubgraphInputObserver(while_op.param().cond_subg_index);
+ appendSubgraphInputObserver(while_op.param().body_subg_index);
+
+ setControlFlowOutputObserver(while_op.param().body_subg_index);
+ }
+ });
+ }
+
+ return inferers;
+}
+
void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op)
{
auto &operands = _lowered_subg->graph().operands();
auto origin_output_shape = op.param().origin_output_shapes[0];
// TODO: more check for valid batch request
- assert(cur_input_shape.dim(0) >= origin_output_shape.dim(0));
- assert(cur_input_shape.dim(0) % origin_output_shape.dim(0) == 0);
+ if ((cur_input_shape.dim(0) < origin_output_shape.dim(0)) ||
+ (cur_input_shape.dim(0) % origin_output_shape.dim(0) != 0))
+ {
+ throw std::runtime_error("StaticShapeInferer " + op.name() + ": Not supported batch size");
+ }
size_t batch_multiplier = cur_input_shape.dim(0) / origin_output_shape.dim(0);
ir::Shape new_shape;
backend::ITensor *getITensor(ir::OperandIndex ind) const
{
- for (auto &tensor_reg : _tensor_regs)
+ for (auto &&tensor_reg : _tensor_regs)
{
auto tensor = tensor_reg->getITensor(ind);
if (tensor)
VERBOSE(OddOutputPass) << "Case 1 : An operand which is a model output and a model input"
<< std::endl;
- for (auto &ind : outputs)
+ for (const auto &ind : outputs)
{
if (_graph.getInputs().contains(ind))
{
VERBOSE(OddOutputPass) << "Case 2 : Two or more duplicated outputs" << std::endl;
std::unordered_set<ir::OperandIndex> occurence;
- for (auto &ind : outputs)
+ for (auto &&ind : outputs)
{
auto &obj = _graph.operands().at(ind);
if (occurence.count(ind) == 0)
void PassRunner::run()
{
- for (auto &pass : _passes)
+ for (auto &&pass : _passes)
{
VERBOSE(PassRunner) << "Start running '" << pass->id() << "'" << std::endl;
pass->run();
}
}
- for (auto &operation : remove_list)
+ for (const auto &operation_index : remove_list)
{
- object.removeUse(operation);
+ object.removeUse(operation_index);
}
}
}
namespace exec
{
-Execution::Execution(const std::shared_ptr<Executors> &executors) : _executors{executors}
+Execution::Execution(const std::shared_ptr<IExecutors> &executors) : _executors{executors}
{
assert(executors != nullptr);
- assert(executors->at(ir::SubgraphIndex{0}) != nullptr);
+ assert(executors->entryExecutor() != nullptr);
_io_desc.inputs.resize(_executors->inputSize());
_io_desc.outputs.resize(_executors->outputSize());
- sem_init(&_async_io_descs_sem, 0, 1);
}
void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_shape)
_io_desc.inputs.at(index.value()) = std::make_unique<InputDesc>(info, buffer, length, layout);
}
-void Execution::createNewAsyncDesc(uint32_t count)
-{
- IODescription *_async_io_desc = new IODescription;
- _async_io_desc->inputs.resize(primary_subgraph().getInputs().size());
- _async_io_desc->outputs.resize(primary_subgraph().getOutputs().size());
-
- _async_io_descs.push_back({_async_io_desc, count});
-}
-
-void Execution::setFinish() { finished = true; }
-
-bool Execution::isEmptyQueue()
-{
- asyncIoDescSemWait();
- bool ret = _async_io_descs.empty();
- if (!ret)
- {
- for (uint32_t idx = 0; idx < _async_io_descs.front().first->inputs.size(); idx++)
- {
- if (_async_io_descs.front().first->inputs.at(idx).get() == nullptr)
- {
- ret = true;
- break;
- }
- }
- }
- asyncIoDescSemPost();
- return ret;
-}
-
-void Execution::executeAsyncInput(const ir::IOIndex &index, const void *buffer, size_t length,
- ir::Layout layout)
-{
- const auto info = _executors->inputInfo(index);
- IODescription *_async_io_desc = _async_io_descs.back().first;
-
- {
- auto input_shape_sig = _async_io_desc->dynamic_input_shapes.find(index);
- auto size_required =
- (input_shape_sig != _async_io_desc->dynamic_input_shapes.end())
- ? input_shape_sig->second.num_elements() * onert::ir::sizeOfDataType(info.typeInfo().type())
- : info.total_size();
-
- if (length < size_required)
- {
- throw std::runtime_error{"Too small length"};
- }
- }
- void *_buffer = (void *)malloc(length);
- if (_buffer == NULL)
- {
- throw std::runtime_error{"malloc failed"};
- }
- memcpy(_buffer, buffer, length);
-
- _async_io_desc->inputs.at(index.value()) =
- std::make_unique<InputDesc>(info, _buffer, length, layout);
-}
-
-void Execution::executeAsyncOutput(const ir::IOIndex &index, void *buffer, size_t length,
- ir::Layout layout)
-{
- const auto info = _executors->outputInfo(index);
- IODescription *_async_io_desc = _async_io_descs.front().first;
-
- if (length < info.total_size())
- {
- throw std::runtime_error{"Too small length"};
- }
-
- _async_io_desc->outputs.at(index.value()) =
- std::make_unique<OutputDesc>(info, buffer, length, layout);
-}
-
// TODO Remove default parameter
void Execution::setInput(const ir::IOIndex &index, const ir::TypeInfo &type, const ir::Shape &shape,
const void *buffer, size_t length, ir::Layout layout)
VERBOSE(Execution) << "Execution finished" << std::endl;
}
-void Execution::AsyncExecute()
-{
- VERBOSE(Execution) << "Start Async execution" << std::endl;
- if (_async_io_descs.empty())
- {
- VERBOSE(Execution) << "The input is not ready" << std::endl;
- return;
- }
-
- primary_executor()->execute(*_async_io_descs.front().first);
-}
-
void Execution::startExecute()
{
VERBOSE(Execution) << "Create asynchronous execution thread" << std::endl;
}
}
+// NNAPI return fail if ANeuralNetworksExecution_getOutputOperandRank or
+// ANeuralNetworksExecution_getOutputOperandDimensions is called before execution.
+// On the other hand, NNFW API return static shape inference result if nnfw_output_tensorinfo is
+// called before execution.
+// To handle both case, this method retun static shape inference result and fail will be handled on
+// NNAPI frontend.
ir::Shape Execution::getOutputShape(ir::IOIndex ind) const
{
if (!isFinished())
- throw std::runtime_error("Cannot get output shape before execution is finished");
+ return _executors->outputInfo(ind).shape();
const auto &output_desc = _io_desc.outputs.at(ind.value());
return output_desc->info.shape();
}
-void Execution::asyncIoDescSemWait() { sem_wait(&_async_io_descs_sem); }
-
-void Execution::asyncIoDescSemPost() { sem_post(&_async_io_descs_sem); }
-
-void Execution::runInference()
-{
- uint32_t inference_cnt;
- uint32_t output_sz = primary_subgraph().getOutputs().size();
- while (true)
- {
- if (isEmptyQueue())
- {
- if (isFinished())
- {
- if (!next_exes.empty())
- {
- for (uint32_t i = 0; i < next_exes.size(); i++)
- {
- std::get<0>(next_exes[i])->setFinish();
- }
- }
- else
- {
- sholudStop();
- }
- break;
- }
- }
- else
- {
- for (uint32_t i = 0; i < output_sz; i++)
- {
- auto opidx = primary_subgraph().getOutputs().at(i);
- auto shape = primary_subgraph().operands().at(opidx).shape();
- auto dtype = primary_subgraph().operands().at(opidx).typeInfo().type();
- auto rank = shape.rank();
- uint32_t tensor_size = 1;
- for (int32_t j = 0; j < rank; j++)
- {
- tensor_size *= shape.dim(j);
- }
- if (dtype == onert::ir::DataType::FLOAT32 || dtype == onert::ir::DataType::INT32 ||
- dtype == onert::ir::DataType::UINT32)
- tensor_size *= 4;
- else if (dtype == onert::ir::DataType::INT64)
- tensor_size *= 8;
- void *_buffer = (void *)malloc(tensor_size);
- if (_buffer == NULL)
- {
- throw std::runtime_error{"malloc failed"};
- }
- executeAsyncOutput(onert::ir::IOIndex(i), _buffer, tensor_size);
- }
- AsyncExecute();
-
- // set inputs of next execution
- auto _io_desc = getAsyncIoDescs()->front().first;
- inference_cnt = getAsyncIoDescs()->front().second;
- getAsyncIoDescs()->pop_front();
-
- for (uint32_t i = 0; i < next_exes.size(); i++)
- {
- auto next_exe = std::get<0>(next_exes[i]);
- auto o_index = std::get<1>(next_exes[i]);
- auto i_index = std::get<2>(next_exes[i]);
-
- next_exe->asyncIoDescSemWait();
- auto next_io_descs = next_exe->getAsyncIoDescs();
- bool exist = false;
- for (auto iter = next_io_descs->begin(); iter != next_io_descs->end(); iter++)
- {
- if (inference_cnt == iter->second)
- {
- exist = true;
- }
- }
-
- if (!exist)
- {
- next_exe->createNewAsyncDesc(inference_cnt);
- }
- for (auto iter = next_io_descs->begin(); iter != next_io_descs->end(); iter++)
- {
- if (inference_cnt == iter->second)
- {
- const auto input_index = next_exe->primary_subgraph().getInputs().at(i_index.value());
- const auto info = next_exe->primary_subgraph().operands().at(input_index).info();
-
- size_t length = _io_desc->outputs[o_index.value()]->size;
- void *_buffer = (void *)malloc(length);
- if (_buffer == NULL)
- {
- throw std::runtime_error{"malloc failed"};
- }
- memcpy(_buffer, _io_desc->outputs[o_index.value()]->buffer, length);
-
- iter->first->inputs.at(i_index.value()) = std::make_unique<onert::exec::InputDesc>(
- info, _buffer, length, onert::ir::Layout::NHWC);
- break;
- }
- }
- next_exe->asyncIoDescSemPost();
- }
-
- if (next_exes.empty())
- {
- std::vector<void *> results;
- for (uint32_t i = 0; i < _io_desc->outputs.size(); i++)
- {
- size_t length = _io_desc->outputs[i]->size;
- void *_buffer = (void *)malloc(length);
- if (_buffer == NULL)
- {
- throw std::runtime_error{"malloc failed"};
- }
- memcpy(_buffer, _io_desc->outputs[i]->buffer, length);
- results.push_back(_buffer);
- }
- _async_results.push_back(results);
- }
-
- for (uint32_t i = 0; i < _io_desc->inputs.size(); i++)
- {
- auto p = _io_desc->inputs.at(i).release();
- if (p)
- {
- free((void *)p->buffer);
- delete p;
- }
- }
- for (uint32_t i = 0; i < _io_desc->outputs.size(); i++)
- {
- auto p = _io_desc->outputs.at(i).release();
- if (p)
- {
- free(p->buffer);
- delete p;
- }
- }
- delete _io_desc;
- }
- }
-}
-
-bool Execution::stopWait(void) const { return stop_wait; }
-
-void Execution::sholudStop() { stop_wait = true; }
-
} // namespace exec
} // namespace onert
#include "exec/Execution.h"
#include "compiler/Compiler.h"
+#include "compiler/CompilerFactory.h"
#include "ir/Graph.h"
#include "ir/operation/BinaryArithmetic.h"
#include "util/TracingCtx.h"
std::shared_ptr<onert::compiler::CompilerArtifact> artifact;
};
+class CompiledMockUpMultiModel
+{
+public:
+ CompiledMockUpMultiModel()
+ {
+ // Model0: a float elementwise add operation
+ // Model0 input: lhs0, rhs0
+ // Model0 output: add result (result0)
+
+ // Model1: a qasymm8 elementwise add operation
+ // Model1 input: result0, rhs1
+ // Model1 output: add result (result1)
+
+ // Model2: a float elementwise add operation
+ // Model2 input: result0, result1
+ // Model2 output: add result (result2)
+
+ // constant: rhs2
+ // result0 <= (lhs0 + rhs0)
+ // result1 <= (result0 + rhs1)
+ // result2 <= (result0 + result1)
+ // lhs0, rhs0, rh1, result0, result1, result2 shape: {1, 2, 2, 1}
+ // activation: none (constant)
+
+ // Update edge information
+ edges.pkg_inputs.emplace_back(ModelIndex{0}, SubgraphIndex{0}, IOIndex{0});
+ edges.pkg_inputs.emplace_back(ModelIndex{0}, SubgraphIndex{0}, IOIndex{1});
+ edges.pkg_outputs.emplace_back(ModelIndex{2}, SubgraphIndex{0}, IOIndex{0});
+ // From
+ const auto result0 = IODesc{ModelIndex{0}, SubgraphIndex{0}, IOIndex{0}};
+ const auto result1 = IODesc{ModelIndex{1}, SubgraphIndex{0}, IOIndex{0}};
+ // To
+ const auto lhs1 = IODesc{ModelIndex{1}, SubgraphIndex{0}, IOIndex{0}};
+ const auto lhs2 = IODesc{ModelIndex{2}, SubgraphIndex{0}, IOIndex{0}};
+ const auto rhs2 = IODesc{ModelIndex{2}, SubgraphIndex{0}, IOIndex{1}};
+ edges.edges.insert({result0, lhs1});
+ edges.edges.insert({result0, lhs2});
+ edges.edges.insert({result1, rhs2});
+
+ for (size_t i = 0; i < 3; ++i)
+ {
+ graphs.emplace_back(std::make_shared<Graph>());
+ }
+ Shape shape{1, 2, 2, 1};
+
+ // Model0's add operands (result1 <= lhs0 + rhs0)
+ DataType types[3] = {DataType::FLOAT32, DataType::QUANT_UINT8_ASYMM, DataType::FLOAT32};
+ auto operand_lhs0 = graphs[0]->addOperand(shape, TypeInfo{types[0]});
+ auto operand_rhs0 = graphs[0]->addOperand(shape, TypeInfo{types[0]});
+ auto operand_result0 = graphs[0]->addOperand(shape, TypeInfo{types[0]});
+
+ // Model0's add operation
+ operation::BinaryArithmetic::Param param0;
+ param0.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param0.activation = Activation::NONE;
+ auto input_set0 = OperandIndexSequence{operand_lhs0, operand_rhs0};
+ auto output_set0 = OperandIndexSequence{operand_result0};
+ graphs[0]->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set0, output_set0, param0));
+
+ // Model0's inputs/outputs
+ graphs[0]->addInput(operand_lhs0);
+ graphs[0]->addInput(operand_rhs0);
+ graphs[0]->addOutput(operand_result0);
+ graphs[0]->verify();
+
+ // Model1's add operands (result2 <= Model0 result + rhs1)
+ // static float rhs1_data[4] = {3, 1, -1, 5};
+ static uint8_t rhs1_data[4] = {131, 129, 127, 133};
+ const float scale = 1;
+ const int32_t zero_point = 128;
+ auto operand_lhs1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point});
+ auto operand_rhs1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point});
+ auto operand_result1 = graphs[1]->addOperand(shape, TypeInfo{types[1], scale, zero_point});
+ graphs[1]
+ ->operands()
+ .at(operand_rhs1)
+ .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs1_data), 4));
+
+ // Model1's add operation
+ operation::BinaryArithmetic::Param param1;
+ param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param1.activation = Activation::NONE;
+ auto input_set1 = OperandIndexSequence{operand_lhs1, operand_rhs1};
+ auto output_set1 = OperandIndexSequence{operand_result1};
+ graphs[1]->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+
+ // Model1's inputs/outputs
+ graphs[1]->addInput(operand_lhs1);
+ graphs[1]->addOutput(operand_result1);
+ graphs[1]->verify();
+
+ // Model2's additional operands (result3 <= Model0 result + Model1 result)
+ auto operand_lhs2 = graphs[2]->addOperand(shape, TypeInfo{types[2]});
+ auto operand_rhs2 = graphs[2]->addOperand(shape, TypeInfo{types[2]});
+ auto operand_result2 = graphs[2]->addOperand(shape, TypeInfo{types[2]});
+
+ // Model2's add operation
+ operation::BinaryArithmetic::Param param2;
+ param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param2.activation = Activation::NONE;
+ auto input_set2 = OperandIndexSequence{operand_lhs2, operand_rhs2};
+ auto output_set2 = OperandIndexSequence{operand_result2};
+ graphs[2]->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+
+ // Model1's inputs/outputs
+ graphs[2]->addInput(operand_lhs2);
+ graphs[2]->addInput(operand_rhs2);
+ graphs[2]->addOutput(operand_result2);
+ graphs[2]->verify();
+
+ // Compile
+ compile();
+ }
+
+public:
+ void compile()
+ {
+ auto nnpkg = std::make_shared<onert::ir::NNPkg>();
+ coptions.clear();
+ for (uint16_t i = 0; i < graphs.size(); ++i)
+ {
+ coptions.emplace_back(onert::compiler::CompilerOptions::fromGlobalConfig());
+
+ auto model = std::make_shared<onert::ir::Model>();
+ model->push(SubgraphIndex{0}, graphs[i]);
+
+ nnpkg->push(onert::ir::ModelIndex{i}, std::move(model));
+ }
+ for (const auto &pkg_input : edges.pkg_inputs)
+ {
+ nnpkg->addInput(pkg_input);
+ }
+ for (const auto &pkg_output : edges.pkg_outputs)
+ {
+ nnpkg->addOutput(pkg_output);
+ }
+ for (const auto &edge : edges.edges)
+ {
+ nnpkg->addEdge(edge.from, edge.to);
+ }
+ auto compiler = onert::compiler::CompilerFactory::get().create(nnpkg, coptions);
+ nnpkg.reset();
+ artifact = compiler->compile();
+ }
+
+public:
+ std::vector<std::shared_ptr<Graph>> graphs;
+ std::vector<std::unique_ptr<onert::compiler::CompilerOptions>> coptions;
+ std::shared_ptr<onert::compiler::CompilerArtifact> artifact;
+ ModelEdges edges;
+};
+
TEST(ExecInstance, simple)
{
auto mockup = CompiledMockUpModel();
{
public:
Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4],
- std::shared_ptr<onert::exec::Executors> &executors)
+ std::shared_ptr<onert::exec::IExecutors> &executors)
: _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
{
// DO NOTHING
const float (&_input1)[4];
const float (&_input2)[4];
float (&_output)[4];
- std::shared_ptr<onert::exec::Executors> &_executors;
+ std::shared_ptr<onert::exec::IExecutors> &_executors;
};
// Support multi-thread execution
}
}
+TEST(ExecInstance, multi_model_simple)
+{
+ auto mockup = CompiledMockUpMultiModel();
+ auto executors = mockup.artifact->_executors;
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float input1_buffer[4] = {1, 0, -1, -2};
+ const float input2_buffer[4] = {1, -3, 2, -4};
+ float output_buffer[4] = {};
+ const float output_expected[4] = {7, -5, 1, -7};
+
+ onert::exec::Execution execution{executors};
+
+ execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+ execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+ execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+ execution.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(output_buffer[i], output_expected[i]);
+ }
+}
+
+TEST(ExecInstance, multi_model_twoCompile)
+{
+ auto mockup = CompiledMockUpMultiModel();
+ auto executors1 = mockup.artifact->_executors;
+ onert::exec::Execution execution1{executors1};
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+ const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+ float exe1_output_buffer[4] = {};
+ const float exe1_output_expected[4] = {7, -5, 1, -7};
+
+ execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+ execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+ execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+ // Make new executor: compile again
+ mockup.compile();
+ onert::exec::Execution execution2{mockup.artifact->_executors};
+
+ const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+ const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+ float exe2_output_buffer[4] = {};
+ const float exe2_output_expected[4] = {1, 9, -3, 9};
+
+ execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+ execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+ execution2.setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+ execution1.execute();
+ execution2.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+ EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+ }
+}
+
+// Support two initialized execution instance then ordered execution
+TEST(ExecInstance, multi_model_twoExecution)
+{
+ auto mockup = CompiledMockUpMultiModel();
+ auto executors = mockup.artifact->_executors;
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output1 = IOIndex{0};
+
+ const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+ const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+ float exe1_output_buffer[4] = {};
+ const float exe1_output_expected[4] = {7, -5, 1, -7};
+ const float exe2_output_expected[4] = {1, 9, -3, 9};
+
+ onert::exec::Execution execution1{executors};
+ execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+ execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+ execution1.setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+ const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+ const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+ float exe2_output_buffer[4] = {};
+
+ // Make new execution
+ onert::exec::Execution execution2{executors};
+ execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+ execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+ execution2.setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+ execution1.execute();
+ execution1.execute();
+ execution2.execute();
+ execution2.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+ EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+ }
+}
+
+// Multi-model is not thread-safe yet
+
+// Support asynchronous execution
+TEST(ExecInstance, multi_model_async)
+{
+ auto mockup = CompiledMockUpMultiModel();
+ auto executors = mockup.artifact->_executors;
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float input1_buffer[4] = {1, 0, -1, -2};
+ const float input2_buffer[4] = {1, -3, 2, -4};
+ float output_buffer[4] = {};
+ const float output_expected[4] = {7, -5, 1, -7};
+
+ onert::exec::Execution execution{executors};
+
+ execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+ execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+ execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+ execution.startExecute();
+ execution.waitFinish();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(output_buffer[i], output_expected[i]);
+ }
+}
+
+TEST(ExecInstance, multi_model_dequant_input_quant_output)
+{
+ auto mockup = CompiledMockUpMultiModel();
+ auto executors = mockup.artifact->_executors;
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const uint8_t input1_buffer[4] = {138, 128, 118, 108}; // {1, 0, -1, -2}
+ const uint8_t input2_buffer[4] = {138, 98, 148, 88}; // {1, -3, 2, -4}
+ uint8_t output_buffer[4] = {};
+ const uint8_t output_expected[4] = {198, 78, 138, 58}; // {7, -5, 1, -7}
+ float scale = 0.1;
+ int32_t zero_point = 128;
+
+ onert::exec::Execution execution{executors};
+
+ onert::ir::TypeInfo type_info{onert::ir::DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+ execution.setInput(input1, type_info, execution.getInputShape(input1),
+ reinterpret_cast<const void *>(input1_buffer), 4, onert::ir::Layout::NHWC);
+ execution.setInput(input2, type_info, execution.getInputShape(input2),
+ reinterpret_cast<const void *>(input2_buffer), 4, onert::ir::Layout::NHWC);
+ execution.setOutput(output, type_info, execution.getOutputShape(output),
+ reinterpret_cast<void *>(output_buffer), 4, onert::ir::Layout::NHWC);
+ execution.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(output_buffer[i], output_expected[i]);
+ }
+}
+
+// TODO Add an unittest multi_model_quant_input_dequant_output
+
} // namespace
void ExecutionObservee::notifySubgraphBegin(ir::SubgraphIndex ind)
{
- for (auto &o : _observers)
+ for (auto &&o : _observers)
{
o->handleSubgraphBegin(ind);
}
void ExecutionObservee::notifySubgraphEnd(ir::SubgraphIndex ind)
{
- for (auto &o : _observers)
+ for (auto &&o : _observers)
{
o->handleSubgraphEnd(ind);
}
void ExecutionObservee::notifyJobBegin(IExecutor *executor, ir::SubgraphIndex subg_ind,
ir::OperationIndex op_ind, const backend::Backend *backend)
{
- for (auto &o : _observers)
+ for (auto &&o : _observers)
{
o->handleJobBegin(executor, subg_ind, op_ind, backend);
}
void ExecutionObservee::notifyJobEnd(IExecutor *executor, ir::SubgraphIndex subg_ind,
ir::OperationIndex op_ind, const backend::Backend *backend)
{
- for (auto &o : _observers)
+ for (auto &&o : _observers)
{
o->handleJobEnd(executor, subg_ind, op_ind, backend);
}
#include "../util/EventRecorder.h"
#include "../util/EventWriter.h"
-#include "exec/Executors.h"
+#include "exec/IExecutor.h"
#include "ir/Index.h"
#include "ir/Operation.h"
#include "util/ITimer.h"
backend::BackendContexts &&backend_contexts,
const compiler::TensorRegistries &tensor_regs,
const util::TracingCtx *tracing_ctx)
- : _lowered_graph{std::move(lowered_graph)}, _backend_contexts{std::move(backend_contexts)},
- _graph{_lowered_graph->graph()}, _parent_graph{_lowered_graph->parent_graph()}, _mutex(),
+ : _lowered_graph{std::move(lowered_graph)},
+ _backend_contexts{std::move(backend_contexts)}, _graph{_lowered_graph->graph()}, _mutex(),
_tracing_ctx(tracing_ctx)
{
auto build_tensor_list = [&](const auto &ind_seq, auto &tensors) {
{
tensor->set_dynamic();
tensor->setShape(input_shape->second);
+ /*
+ * Changes tensor shape and allocate memory since its shape was changed
+ * perhaps by nnfw_set_input_tensorinfo()
+ *
+ * Cases are:
+ * 1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
+ * (a) (b)
+ *
+ * at (a), operand is static, tensor is static - memory dealloc is not needed
+ * (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager)
+ * at (b), operand is static, tensor is dynamic - memory dealloc is needed
+ *
+ * 2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
+ * (a) (b)
+ *
+ * at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed
+ * since it has not been allocated yet
+ * at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed
+ */
+ tensor->applyShape(input_shape->second);
}
-
- handleDynamicInputTensor(ir::IOIndex{i}, desc);
}
assert(_output_tensors.size() == desc.outputs.size());
}
}
-/**
- * @brief Changes tensor shape and allocate memory
- * if input shape was changed by nnfw_set_input_tensorinfo()
- *
- * @note Cases are:
- * 1) static operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
- * (a) (b)
- *
- * at (a), operand is static, tensor is static - memory dealloc is not needed
- * (DynamicTensorManager cannot dealloc memory allocated by StaticTensorManager)
- * at (b), operand is static, tensor is dynamic - memory dealloc is needed
- *
- * 2) dynamic operand -> nnfw_set_input_tensorinfo() -> execute() -> execute()
- * (a) (b)
- *
- * at (a), operand is dynamic, tensor is dynamic - memory dealloc is not needed
- * since it has not been allocated yet
- * at (b), operand is dynamic, tensor is dynamic - memory dealloc is needed
- */
-void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc)
-{
- auto shape_sig_found = desc.dynamic_input_shapes.find(io_ind);
- if (shape_sig_found != desc.dynamic_input_shapes.end())
- {
- auto changed_input_shape = shape_sig_found->second;
- _input_tensors[io_ind.value()]->applyShape(changed_input_shape);
- }
-}
-
bool ExecutorBase::hasDynamicInput()
{
- for (auto &tensor : _input_tensors)
+ for (auto &&tensor : _input_tensors)
{
if (tensor->is_dynamic())
return true;
virtual ~ExecutorBase() = default;
- const ir::Graph &graph() final { return _graph; }
-
- const ir::Graph &parent_graph() final { return _parent_graph; }
+ const ir::Graph &graph() const final { return _graph; }
void execute(const IODescription &desc) final;
void addObserver(std::unique_ptr<IExecutionObserver> ref) { _subject.add(std::move(ref)); };
+ const std::vector<backend::builtin::IOTensor *> &getInputTensors() const override
+ {
+ return _input_tensors;
+ }
+
const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const override
{
return _output_tensors;
std::unique_ptr<compiler::LoweredGraph> _lowered_graph;
backend::BackendContexts _backend_contexts;
const ir::Graph &_graph;
- const ir::Graph &_parent_graph;
std::vector<backend::builtin::IOTensor *> _input_tensors;
std::vector<backend::builtin::IOTensor *> _output_tensors;
std::mutex _mutex;
const util::TracingCtx *_tracing_ctx;
-
-private:
- void handleDynamicInputTensor(ir::IOIndex input_index, const IODescription &desc);
};
} // namespace exec
* limitations under the License.
*/
-#include "exec/Executors.h"
+#include "Executors.h"
-namespace onert
-{
-namespace exec
+#include "../backend/builtin/IOTensor.h"
+
+namespace
{
-uint32_t Executors::inputSize() const
+using namespace onert;
+
+int32_t find_input_index(const std::vector<ir::IODesc> &pkg_inputs,
+ const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ const ir::IOIndex &io_index)
{
- return _model_edges ? _model_edges->pkg_inputs.size()
- : _executors.at(ir::SubgraphIndex{0})->graph().getInputs().size();
+ for (size_t i = 0; i < pkg_inputs.size(); i++)
+ {
+ auto &input_desc = pkg_inputs[i];
+ if ((std::get<ir::ModelIndex>(input_desc) == model_index) &&
+ (std::get<ir::SubgraphIndex>(input_desc) == subg_index) &&
+ (std::get<ir::IOIndex>(input_desc) == io_index))
+ return static_cast<int32_t>(i);
+ }
+ return -1;
}
-uint32_t Executors::outputSize() const
+int32_t find_output_index(const std::vector<ir::IODesc> &pkg_outputs,
+ const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ const ir::IOIndex &io_index)
{
- return _model_edges ? _model_edges->pkg_outputs.size()
- : _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().size();
+ for (size_t i = 0; i < pkg_outputs.size(); i++)
+ {
+ auto &input_desc = pkg_outputs[i];
+ if ((std::get<ir::ModelIndex>(input_desc) == model_index) &&
+ (std::get<ir::SubgraphIndex>(input_desc) == subg_index) &&
+ (std::get<ir::IOIndex>(input_desc) == io_index))
+ return static_cast<int32_t>(i);
+ }
+ return -1;
}
-const ir::OperandInfo Executors::inputInfo(const ir::IOIndex &index)
+} // namespace
+
+namespace onert
+{
+namespace exec
+{
+
+class Executors::EdgeTensor : public backend::builtin::IOTensor
{
- if (_model_edges)
+public:
+ EdgeTensor(const ir::OperandInfo &info, ir::Layout layout)
+ : backend::builtin::IOTensor(info, layout), _buffer{nullptr}, _ref_count{0}
{
- // Assume that each model may have only one subgraph
- // TODO handle general case
- const auto desc = _model_edges->pkg_inputs[index.value()];
- const auto model_idx = std::get<0>(desc);
- const auto executor_idx = ir::SubgraphIndex{model_idx.value()};
- const auto input_index = _executors.at(executor_idx)->graph().getInputs().at(std::get<2>(desc));
- return _executors.at(executor_idx)->graph().operands().at(input_index).info();
}
+ ~EdgeTensor() = default;
- const auto input_index = _executors.at(ir::SubgraphIndex{0})->graph().getInputs().at(index);
- return _executors.at(ir::SubgraphIndex{0})->graph().operands().at(input_index).info();
-}
+ void allocate_buffer()
+ {
+ const auto total_size = orig_info().total_size();
+ _buffer = std::make_unique<uint8_t[]>(total_size);
+ _ref_count = 1;
-const ir::OperandInfo Executors::outputInfo(const ir::IOIndex &index)
-{
- if (_model_edges)
+ // NOTE Executor's inputs/outputs are always IPortableTensor. If backend of inputs/outputs
+ // is using tensor that does not inherit IPortableTensor, Permute operation is added
+ // and all inputs/outputs become IPortableTensor at compile stage.
+ // This allows user's buffers to be set to inputs/outputs of executors.
+ setUserTensor(_buffer.get(), total_size);
+ }
+
+ void increase_ref() { _ref_count++; }
+
+ void decrease_ref()
{
- // Assume that each model may have only one subgraph
- // TODO handle general case
- auto desc = _model_edges->pkg_outputs[index.value()];
- auto model_idx = std::get<0>(desc);
- auto executor_idx = ir::SubgraphIndex{model_idx.value()};
- auto output_index = _executors.at(executor_idx)->graph().getOutputs().at(std::get<2>(desc));
- return _executors.at(executor_idx)->graph().operands().at(output_index).info();
+ assert(_ref_count > 0);
+ _ref_count--;
+ if (_ref_count == 0)
+ {
+ _buffer.reset();
+ setUserTensor(nullptr, orig_info().total_size());
+ }
}
- auto output_index = _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().at(index);
- return _executors.at(ir::SubgraphIndex{0})->graph().operands().at(output_index).info();
+private:
+ std::unique_ptr<uint8_t[]> _buffer;
+ int32_t _ref_count;
+};
+
+void Executors::emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ std::unique_ptr<IExecutor> exec)
+{
+ _executors.emplace(std::make_pair(model_index, subg_index), std::move(exec));
}
-void Executors::execute(const IODescription &desc)
+IExecutor *Executors::at(const ir::ModelIndex &model_index,
+ const ir::SubgraphIndex &subg_index) const
+{
+ return _executors.at(std::make_pair(model_index, subg_index)).get();
+}
+
+uint32_t Executors::inputSize() const { return _model_edges->pkg_inputs.size(); }
+
+uint32_t Executors::outputSize() const { return _model_edges->pkg_outputs.size(); }
+
+const ir::OperandInfo &Executors::inputInfo(const ir::IOIndex &index) const
{
- if (_model_edges)
- return executeEntries(desc);
+ auto const desc = _model_edges->pkg_inputs[index.value()];
+ auto const model_index = std::get<0>(desc);
+ auto const subg_index = std::get<1>(desc);
+ auto const io_index = std::get<2>(desc);
+ auto const executor = at(model_index, subg_index);
+ return executor->getInputTensors().at(io_index.value())->orig_info();
+}
- _executors.at(ir::SubgraphIndex{0})->execute(desc);
+const ir::OperandInfo &Executors::outputInfo(const ir::IOIndex &index) const
+{
+ auto const desc = _model_edges->pkg_outputs[index.value()];
+ auto const model_index = std::get<0>(desc);
+ auto const subg_index = std::get<1>(desc);
+ auto const io_index = std::get<2>(desc);
+ auto const executor = at(model_index, subg_index);
+ return executor->getOutputTensors().at(io_index.value())->orig_info();
}
-void Executors::executeEntries(const IODescription &desc)
+// Allow below edges only
+// m1 < m2, s1 == 0 and s2 == 0 if m1:s1:o1 -> m2:s2:o2'
+void Executors::checkSupportedMultimodel() const
{
- // Assume 2 executors only
- // Assume that each model may have only one subgraph
- // TODO Support general case
- if (_executors.size() != 2)
- throw std::runtime_error{"NYI: Multi model execution for this package is not supported yet"};
+ // If package includes no-connection model, model_count is less than real model count in package.
+ // Then this method will throw exception based on model index
+ // 1st model: input assumption
+ // Otherwise: edges assumption
- // Assume all edges are 0:0:x -> 1:0:x
+ // Assumption: edges
+ // m1 < m2, s1 == 0 and s2 == 0 if edge 'm1:s1:o1 -> m2:s2:o2'
for (auto edge : _model_edges->edges)
{
- if ((std::get<ir::ModelIndex>(edge.from) != ir::ModelIndex{0}) ||
- (std::get<ir::ModelIndex>(edge.to) != ir::ModelIndex{1}) ||
- (std::get<ir::SubgraphIndex>(edge.from) != ir::SubgraphIndex{0}) ||
- (std::get<ir::SubgraphIndex>(edge.to) != ir::SubgraphIndex{0}) ||
- (std::get<ir::IOIndex>(edge.from) != std::get<ir::IOIndex>(edge.to)))
- throw std::runtime_error{"NYI: Multi model execution for this edge is not supported yet"};
+ auto const model_from = std::get<ir::ModelIndex>(edge.from);
+ auto const model_to = std::get<ir::ModelIndex>(edge.to);
+ auto const subg_from = std::get<ir::SubgraphIndex>(edge.from);
+ auto const subg_to = std::get<ir::SubgraphIndex>(edge.to);
+
+ if (model_from.value() == model_to.value())
+ {
+ throw std::runtime_error{"Multi model's edge set has invalid edge"};
+ }
+
+ if ((model_from.value() > model_to.value()) || (subg_from != ir::SubgraphIndex{0}) ||
+ (subg_to != ir::SubgraphIndex{0}))
+ throw std::runtime_error{"NYI: Multi model execution for this edge set is not supported yet"};
}
- // Assume all package inputs are 0:0:x
- for (uint32_t i = 0; i < _model_edges->pkg_inputs.size(); i++)
+ // Assumption: package inputs
+ // All 1st model inputs come from package input if always m1 < m2
{
- auto input = _model_edges->pkg_inputs[i];
- if ((std::get<ir::ModelIndex>(input) != ir::ModelIndex{0}) ||
- (std::get<ir::SubgraphIndex>(input) != ir::SubgraphIndex{0}) ||
- (std::get<ir::IOIndex>(input) != ir::IOIndex{i}))
+ auto first_executor = at(ir::ModelIndex{0}, ir::SubgraphIndex{0});
+ auto search_first_model = [&](const ir::IOIndex &input_index) {
+ for (const auto &input : _model_edges->pkg_inputs)
+ {
+ if ((std::get<ir::ModelIndex>(input) == ir::ModelIndex{0}) ||
+ (std::get<ir::SubgraphIndex>(input) == ir::SubgraphIndex{0}) ||
+ (std::get<ir::IOIndex>(input) == input_index))
+ return true;
+ }
+
+ return false;
+ };
+
+ for (uint32_t i = 0; i < first_executor->getInputTensors().size(); i++)
{
- throw std::runtime_error{"NYI: Support package input to 1st model with same order"};
+ if (!search_first_model(ir::IOIndex{i}))
+ throw std::runtime_error{"Cannot find 1st model's input buffer"};
}
}
- // Assume all package outputs are 1:0:x
- for (uint32_t i = 0; i < _model_edges->pkg_outputs.size(); i++)
+ // Check whether nnpkg outputs and Edge `from` are duplicated
+ for (const auto &edge : _model_edges->edges)
{
- auto output = _model_edges->pkg_outputs[i];
- if ((std::get<ir::ModelIndex>(output) != ir::ModelIndex{1}) ||
- (std::get<ir::SubgraphIndex>(output) != ir::SubgraphIndex{0}) ||
- (std::get<ir::IOIndex>(output) != ir::IOIndex{i}))
+ if (std::find(_model_edges->pkg_outputs.begin(), _model_edges->pkg_outputs.end(), edge.from) !=
+ _model_edges->pkg_outputs.end())
{
- throw std::runtime_error{"NYI: Support package output from 2nd model with same order"};
+ throw std::runtime_error{"Multi model execution does not support duplicating nnpkg outputs "
+ "with `from` of edges yet"};
}
}
+}
+
+void Executors::createEdgeQuantLayers()
+{
+ if (_is_created_edge_quant_layers)
+ {
+ return;
+ }
- const auto &executor1 = _executors.at(ir::SubgraphIndex{0});
- const auto &graph1 = executor1->graph();
- const auto &executor2 = _executors.at(ir::SubgraphIndex{1});
- const auto &graph2 = executor2->graph();
+ // Create EdgeTensor for edges between executors
+ for (const auto &pair : _edge_map)
+ {
+ const auto &from_iodesc = pair.first;
+ const auto &from_model_index = std::get<ir::ModelIndex>(from_iodesc);
+ const auto &from_subg_index = std::get<ir::SubgraphIndex>(from_iodesc);
+ const auto &from_io_index = std::get<ir::IOIndex>(from_iodesc);
+
+ const auto from_executor = _executors.at({from_model_index, from_subg_index}).get();
+ const auto from_tensor = from_executor->getOutputTensors().at(from_io_index.value());
+
+ const auto &from_info = from_tensor->orig_info();
+ const auto from_layout = from_tensor->orig_layout();
+ _edge_tensors[from_iodesc] = std::make_unique<EdgeTensor>(from_info, from_layout);
+ }
- if ((graph1.getInputs().size() != _model_edges->pkg_inputs.size()) ||
- (graph2.getOutputs().size() != _model_edges->pkg_outputs.size()) ||
- (graph1.getOutputs().size() != graph2.getInputs().size()) ||
- (graph1.getOutputs().size() != _model_edges->edges.size()))
+ // Append type-aware quantization layer for edges between executors
+ for (const auto &executor_pair : _executors)
{
- throw std::runtime_error{"NYI: Unsupported model edge pattern"};
+ const auto &executor_index = executor_pair.first;
+ const auto &model_index = executor_index.first;
+ const auto &subg_index = executor_index.second;
+
+ std::vector<backend::ITensor *> inputs;
+ std::vector<backend::ITensor *> outputs;
+ for (const auto &pair : _edge_map)
+ {
+ const auto &from_iodesc = pair.first;
+ if (std::get<ir::ModelIndex>(from_iodesc) == model_index &&
+ std::get<ir::SubgraphIndex>(from_iodesc) == subg_index)
+ {
+ const auto from_tensor = _edge_tensors[from_iodesc].get();
+ const auto &to_list = pair.second;
+
+ for (const auto &to_iodesc : to_list)
+ {
+ const auto &to_model_index = std::get<ir::ModelIndex>(to_iodesc);
+ const auto &to_subg_index = std::get<ir::SubgraphIndex>(to_iodesc);
+ const auto &to_io_index = std::get<ir::IOIndex>(to_iodesc);
+
+ const auto to_executor = _executors.at({to_model_index, to_subg_index}).get();
+ const auto to_tensor = to_executor->getInputTensors().at(to_io_index.value());
+
+ // TODO Unify tensors with the same `from` tensor and same type
+ if (from_tensor->data_type() != to_tensor->data_type())
+ {
+ assert(inputs.size() == outputs.size());
+ const auto &to_info =
+ to_executor->getInputTensors().at(to_io_index.value())->orig_info();
+ const auto to_layout = to_tensor->orig_layout();
+ inputs.emplace_back(from_tensor);
+
+ auto type_aware_quant_tensor = std::make_unique<EdgeTensor>(to_info, to_layout);
+ outputs.emplace_back(type_aware_quant_tensor.get());
+
+ _edge_quant_tensors[to_iodesc] = std::move(type_aware_quant_tensor);
+ }
+ }
+ }
+ }
+
+ auto layer = std::make_unique<PermuteLayer>(inputs, outputs);
+ layer->prepare();
+ _edge_quant_layers[{model_index, subg_index}] = std::move(layer);
}
- // Prepare buffer
- // Assume buffer layout is NHWC
- std::vector<std::unique_ptr<uint8_t[]>> bufs(_model_edges->edges.size());
- std::vector<const ir::OperandInfo *> buf_infos(_model_edges->edges.size());
- const auto layout = ir::Layout::NHWC;
+ _is_created_edge_quant_layers = true;
+}
- for (uint32_t i = 0; i < graph1.getOutputs().size(); i++)
+void Executors::CreatePkgIOTensors(const IODescription &desc)
+{
+ for (const auto &pkg_input : _model_edges->pkg_inputs)
{
- const auto buf_index =
- _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().at(ir::IOIndex{i});
- buf_infos[i] = &_executors.at(ir::SubgraphIndex{0})->graph().operands().at(buf_index).info();
- const auto buf_size = buf_infos[i]->total_size();
- bufs[i] = std::make_unique<uint8_t[]>(buf_size);
+ // Create IOTensor for nnpkg inputs
+ const auto &model_index = std::get<ir::ModelIndex>(pkg_input);
+ const auto &subg_index = std::get<ir::SubgraphIndex>(pkg_input);
+ const auto &io_index = std::get<ir::IOIndex>(pkg_input);
+ const auto input_pkg_index =
+ find_input_index(_model_edges->pkg_inputs, model_index, subg_index, io_index);
+ auto input_desc = desc.inputs[input_pkg_index].get();
+ _pkg_input_tensors[pkg_input] =
+ std::make_unique<backend::builtin::IOTensor>(input_desc->info, input_desc->layout);
}
- // 1st executor
+ for (const auto &pkg_output : _model_edges->pkg_outputs)
{
- IODescription desc1;
- const auto input_size = graph1.getInputs().size();
- const auto output_size = graph1.getOutputs().size();
- desc1.inputs.resize(input_size);
- desc1.outputs.resize(output_size);
- for (uint32_t i = 0; i < input_size; i++)
- desc1.inputs[i] = std::make_unique<InputDesc>(*desc.inputs[i].get());
- for (uint32_t i = 0; i < output_size; i++)
- desc1.outputs[i] = std::make_unique<OutputDesc>(*buf_infos[i], bufs[i].get(),
- buf_infos[i]->total_size(), layout);
+ // Create IOTensor for nnpkg outputs
+ const auto &model_index = std::get<ir::ModelIndex>(pkg_output);
+ const auto &subg_index = std::get<ir::SubgraphIndex>(pkg_output);
+ const auto &io_index = std::get<ir::IOIndex>(pkg_output);
+ const auto output_pkg_index =
+ find_output_index(_model_edges->pkg_outputs, model_index, subg_index, io_index);
+ auto output_desc = desc.outputs[output_pkg_index].get();
+ _pkg_output_tensors[pkg_output] =
+ std::make_unique<backend::builtin::IOTensor>(output_desc->info, output_desc->layout);
+ }
+}
- executor1->execute(desc1);
+void Executors::createPkgIOQuantLayers(const IODescription &desc)
+{
+ // Append type-aware quantization layer for nnpkg inputs/outputs between executors
+ for (const auto &pair : _executors)
+ {
+ const auto &executor_index = pair.first;
+ const auto &model_index = executor_index.first;
+ const auto &subg_index = executor_index.second;
+ const auto executor = pair.second.get();
+
+ // Find pkg inputs of current executor
+ std::vector<ir::IODesc> pkg_inputs;
+ for (const auto &pkg_input : _model_edges->pkg_inputs)
+ {
+ if (std::get<ir::ModelIndex>(pkg_input) == model_index &&
+ std::get<ir::SubgraphIndex>(pkg_input) == subg_index)
+ {
+ pkg_inputs.emplace_back(pkg_input);
+ }
+ }
+ std::vector<backend::ITensor *> src_tensors;
+ std::vector<backend::ITensor *> dst_tensors;
+ for (const auto &pkg_input : pkg_inputs)
+ {
+ const auto &io_index = std::get<ir::IOIndex>(pkg_input);
+ const auto input_pkg_index =
+ find_input_index(_model_edges->pkg_inputs, model_index, subg_index, io_index);
+ auto input_desc = desc.inputs[input_pkg_index].get();
+
+ // Create EdgeTensor for nnpkg input if type is different
+ const auto input_tensor =
+ executor->getInputTensors().at(std::get<ir::IOIndex>(pkg_input).value());
+ const auto &orig_info = input_tensor->orig_info();
+ if (input_desc->info.typeInfo().type() != input_tensor->orig_info().typeInfo().type())
+ {
+ const auto orig_layout = input_tensor->orig_layout();
+ auto pkg_input_edge_tensor = std::make_unique<EdgeTensor>(orig_info, orig_layout);
+ _pkg_input_quant_tensors[pkg_input] = std::move(pkg_input_edge_tensor);
+
+ // Append type-aware quantization layer's inputs/outputs
+ src_tensors.emplace_back(_pkg_input_tensors[pkg_input].get());
+ dst_tensors.emplace_back(_pkg_input_quant_tensors[pkg_input].get());
+ }
+ }
+
+ // Create type-aware quantization layer for nnpkg inputs
+ auto pkg_input_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors);
+ pkg_input_layer->prepare();
+ _pkg_input_quant_layers[{model_index, subg_index}] = std::move(pkg_input_layer);
+
+ // Find pkg outputs of current executor
+ std::vector<ir::IODesc> pkg_outputs;
+ for (const auto &pkg_output : _model_edges->pkg_outputs)
+ {
+ if (std::get<ir::ModelIndex>(pkg_output) == model_index &&
+ std::get<ir::SubgraphIndex>(pkg_output) == subg_index)
+ {
+ pkg_outputs.emplace_back(pkg_output);
+ }
+ }
+ src_tensors.clear();
+ dst_tensors.clear();
+ // Create Tensors of nnpkg outputs for type-aware quantization
+ for (const auto &pkg_output : pkg_outputs)
+ {
+ const auto &io_index = std::get<ir::IOIndex>(pkg_output);
+ const auto output_pkg_index =
+ find_output_index(_model_edges->pkg_outputs, model_index, subg_index, io_index);
+ auto output_desc = desc.outputs[output_pkg_index].get();
+
+ // Create EdgeTensor for nnpkg output if type is different
+ const auto output_tensor =
+ executor->getOutputTensors().at(std::get<ir::IOIndex>(pkg_output).value());
+ const auto &orig_info = output_tensor->orig_info();
+ if (output_desc->info.typeInfo().type() != output_tensor->orig_info().typeInfo().type())
+ {
+ const auto orig_layout = output_tensor->orig_layout();
+ auto pkg_output_edge_tensor = std::make_unique<EdgeTensor>(orig_info, orig_layout);
+ _pkg_output_quant_tensors[pkg_output] = std::move(pkg_output_edge_tensor);
+
+ // Append type-aware quantization layer's inputs/outputs
+ src_tensors.emplace_back(_pkg_output_quant_tensors[pkg_output].get());
+ dst_tensors.emplace_back(_pkg_output_tensors[pkg_output].get());
+ }
+ }
+
+ // Create type-aware quantization layer for nnpkg outputs
+ auto pkg_output_layer = std::make_unique<PermuteLayer>(src_tensors, dst_tensors);
+ pkg_output_layer->prepare();
+ _pkg_output_quant_layers[{model_index, subg_index}] = std::move(pkg_output_layer);
}
+}
+
+void Executors::execute(const IODescription &desc)
+{
+ // Check supported multi model package
+ checkSupportedMultimodel();
+
+ // TODO Move creating type-aware quantization layers for edges in compilation stage
+ createEdgeQuantLayers();
+
+ // TODO Create IOTensors only once and recreate them only if nnpkg info changes
+ CreatePkgIOTensors(desc);
+
+ // TODO Create type-aware quantization layers only once and recreate them only if type changes
+ createPkgIOQuantLayers(desc);
- // 2nd executor
+ // TODO Find better way to schedule order of executors
+ auto const model_count = modelCount();
+
+ auto find_from = [&](const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ const ir::IOIndex &io_index) {
+ for (const auto &edge : _model_edges->edges)
+ {
+ if ((std::get<ir::ModelIndex>(edge.to) == model_index) &&
+ (std::get<ir::SubgraphIndex>(edge.to) == subg_index) &&
+ (std::get<ir::IOIndex>(edge.to) == io_index))
+ return edge.from;
+ }
+
+ throw std::runtime_error{"Cannot find edge for model input"};
+ };
+
+ // Execute each model
+ // NOTE May be better to use vector instead of unordered_map for _executors
+ for (auto model_index = ir::ModelIndex{0}; model_index.value() < model_count; model_index++)
{
- IODescription desc2;
- const auto input_size = graph2.getInputs().size();
- const auto output_size = graph2.getOutputs().size();
- desc2.inputs.resize(input_size);
- desc2.outputs.resize(output_size);
+ // Find executor
+ auto executor = at(model_index, ir::SubgraphIndex{0});
+
+ // Set IOTensors
+ // TODO Set internal IOTensors only once
+ std::vector<backend::IPortableTensor *> inputs_inter;
+ std::vector<backend::IPortableTensor *> outputs_inter;
+ const auto &input_tensors = executor->getInputTensors();
+ const auto &output_tensors = executor->getOutputTensors();
+ auto const input_size = input_tensors.size();
+ auto const output_size = output_tensors.size();
+ inputs_inter.resize(input_size);
+ outputs_inter.resize(output_size);
+
+ // Set inputs of executor
+ // TODO Create layer to allocate/deallocate buffers of EdgeTensor for each executor
for (uint32_t i = 0; i < input_size; i++)
- desc2.inputs[i] = std::make_unique<InputDesc>(*buf_infos[i], bufs[i].get(),
- buf_infos[i]->total_size(), layout);
+ {
+ const auto input_pkg_index = find_input_index(_model_edges->pkg_inputs, model_index,
+ ir::SubgraphIndex{0}, ir::IOIndex{i});
+ const auto input_io_desc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ if (input_pkg_index != -1)
+ {
+ // Allocate type-aware quantization tensors for nnpkg inputs and set internal tensors
+ if (_pkg_input_quant_tensors.find(input_io_desc) != _pkg_input_quant_tensors.end())
+ {
+ _pkg_input_quant_tensors[input_io_desc]->allocate_buffer();
+
+ inputs_inter[i] = _pkg_input_quant_tensors[input_io_desc].get();
+ }
+ else
+ {
+ inputs_inter[i] = _pkg_input_tensors[input_io_desc].get();
+ }
+
+ // Set buffer of IOTensor
+ auto input_desc = desc.inputs[input_pkg_index].get();
+ // TODO Remove const_cast (we need const_cast as ITensor is writable)
+ _pkg_input_tensors[input_io_desc]->setUserTensor(
+ reinterpret_cast<uint8_t *>(const_cast<void *>(input_desc->buffer)), input_desc->size);
+ }
+ else
+ {
+ auto from_iodesc = find_from(model_index, ir::SubgraphIndex{0}, ir::IOIndex{i});
+ const auto &from_model_index = std::get<ir::ModelIndex>(from_iodesc);
+ const auto &from_subg_index = std::get<ir::SubgraphIndex>(from_iodesc);
+ const auto &from_ioindex = std::get<ir::IOIndex>(from_iodesc).value();
+
+ // Supported only sequantial execution of models
+ assert(from_model_index.value() < model_index.value());
+ assert(from_subg_index.value() == 0);
+ const auto from_executor = _executors.at({from_model_index, from_subg_index}).get();
+ const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ if (_edge_quant_tensors.find(to_iodesc) == _edge_quant_tensors.end())
+ {
+ inputs_inter[i] = from_executor->getOutputTensors().at(from_ioindex);
+ }
+ else
+ {
+ inputs_inter[i] = _edge_quant_tensors.at(to_iodesc).get();
+ }
+ assert(inputs_inter[i]->buffer() != nullptr);
+ }
+ }
+
+ // Set outputs of executor
for (uint32_t i = 0; i < output_size; i++)
- desc2.outputs[i] = std::make_unique<OutputDesc>(*desc.outputs[i].get());
+ {
+ const auto output_pkg_index = find_output_index(_model_edges->pkg_outputs, model_index,
+ ir::SubgraphIndex{0}, ir::IOIndex{i});
+ const auto output_io_desc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ if (output_pkg_index != -1)
+ {
+ // Allocate type-aware quantization tensors for nnpkg outputs and set internal tensors
+ if (_pkg_output_quant_tensors.find(output_io_desc) != _pkg_output_quant_tensors.end())
+ {
+ _pkg_output_quant_tensors[output_io_desc]->allocate_buffer();
+
+ outputs_inter[i] = _pkg_output_quant_tensors[output_io_desc].get();
+ }
+ else
+ {
+ outputs_inter[i] = _pkg_output_tensors[output_io_desc].get();
+ }
+
+ // Set buffer of IOTensor
+ auto output_desc = desc.outputs[output_pkg_index].get();
+ _pkg_output_tensors[output_io_desc]->setUserTensor(
+ reinterpret_cast<uint8_t *>(output_desc->buffer), output_desc->size);
+ }
+ else
+ {
+ // Allocate buffer of `from` tensors
+ const auto from_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ _edge_tensors[from_iodesc]->allocate_buffer();
+ outputs_inter[i] = _edge_tensors[from_iodesc].get();
- executor2->execute(desc2);
+ // Allocate buffer of tensors for type-aware quantization
+ for (const auto &to_iodesc : _edge_map[from_iodesc])
+ {
+ _edge_tensors[from_iodesc]->increase_ref();
+ if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
+ {
+ auto type_aware_quant_tensor = _edge_quant_tensors.at(to_iodesc).get();
+ type_aware_quant_tensor->allocate_buffer();
+
+ _edge_tensors[from_iodesc]->decrease_ref();
+ }
+ }
+ }
+ }
+
+ _pkg_input_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
+
+ executor->execute(inputs_inter, outputs_inter);
+
+ _edge_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
+ _pkg_output_quant_layers[{model_index, ir::SubgraphIndex{0}}]->run();
+
+ // Release input buffers that are no longer needed
+ for (uint32_t i = 0; i < input_size; i++)
+ {
+ const auto input_pkg_index = find_input_index(_model_edges->pkg_inputs, model_index,
+ ir::SubgraphIndex{0}, ir::IOIndex{i});
+
+ const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ if (input_pkg_index == -1)
+ {
+ if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
+ {
+ // Decrease reference count of tensor for type-aware quantization if input tensor is the
+ // tensor
+ const auto to_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+ if (_edge_quant_tensors.find(to_iodesc) != _edge_quant_tensors.end())
+ {
+ _edge_quant_tensors[to_iodesc]->decrease_ref();
+ }
+ }
+ else
+ {
+ // Decrease reference count of `from` tensor if input tensor is the `from` tensor
+ const auto from_iodesc = find_from(model_index, ir::SubgraphIndex{0}, ir::IOIndex{i});
+ _edge_tensors[from_iodesc]->decrease_ref();
+
+ // Decrease reference count of nnpkg inputs
+ if (_pkg_input_quant_tensors.find(to_iodesc) != _pkg_input_quant_tensors.end())
+ {
+ _pkg_input_quant_tensors[to_iodesc]->decrease_ref();
+ }
+ }
+ }
+ }
+
+ // Release output buffers if those buffers are no longer used other executors because of
+ // type-aware quantization
+ // FIXME if tensors for type-aware quantization unified for the same `from` tensor and same type
+ for (uint32_t i = 0; i < output_size; i++)
+ {
+ auto from_iodesc = ir::IODesc{model_index, ir::SubgraphIndex{0}, ir::IOIndex{i}};
+
+ // Check if other executors will use the buffer of edge tensor
+ const auto &to_list = _edge_map[from_iodesc];
+ if (to_list.size() == 0)
+ {
+ // This condition means `from_iodesc` tensor is an output of nnpkg
+ continue;
+ }
+
+ bool to_be_release =
+ !std::any_of(to_list.begin(), to_list.end(), [&](const ir::IODesc &to_iodesc) {
+ // This condition means another executor uses the buffer of edge tensor
+ return _edge_quant_tensors.find(to_iodesc) == _edge_quant_tensors.end();
+ });
+
+ if (to_be_release)
+ {
+ // This edge tensor's buffer won't be used in other executors
+ // Tensors for type-aware quantization take over the role of this edge tensor instead
+ _edge_tensors[from_iodesc]->decrease_ref();
+ }
+
+ // Decrease reference count of nnpkg outputs
+ if (_pkg_output_quant_tensors.find(from_iodesc) != _pkg_output_quant_tensors.end())
+ {
+ _pkg_output_quant_tensors[from_iodesc]->decrease_ref();
+ }
+ }
}
}
+// modelCount() iterates _executors.
+// It assumes that Compiler will generate Executor for all models and _executors includes all
+// generated Executor.
+// If nnpackage includes model(s) which has no connection and Compiler does not
+// generate Executor for them, modelCount() return less value than real model count.
+uint16_t Executors::modelCount() const
+{
+ uint16_t model_count = 0;
+ for (; _executors.find(std::make_pair(ir::ModelIndex{model_count}, ir::SubgraphIndex{0})) !=
+ _executors.end();
+ model_count++)
+ ;
+
+ return model_count;
+}
+
} // namespace exec
} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_EXECUTORS_H__
+#define __ONERT_EXEC_EXECUTORS_H__
+
+#include "exec/IExecutors.h"
+#include "ir/NNPkg.h"
+#include "IPermuteFunction.h"
+
+namespace std
+{
+
+template <> struct hash<std::pair<::onert::ir::ModelIndex, ::onert::ir::SubgraphIndex>>
+{
+ size_t
+ operator()(const std::pair<::onert::ir::ModelIndex, ::onert::ir::SubgraphIndex> &pair) const
+ noexcept
+ {
+ return (hash<uint32_t>()(pair.first.value()) << 16) ^ hash<uint32_t>()(pair.second.value());
+ }
+};
+
+} // namespace std
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather executors
+ */
+class Executors : public IExecutors
+{
+public:
+ Executors(void) = delete;
+ Executors(std::unique_ptr<ir::ModelEdges> model_edges)
+ : _executors{}, _model_edges{std::move(model_edges)}, _edge_quant_layers{},
+ _edge_quant_tensors{}, _edge_tensors{}, _is_created_edge_quant_layers{false},
+ _pkg_input_quant_layers{}, _pkg_output_quant_layers{}, _pkg_input_quant_tensors{},
+ _pkg_output_quant_tensors{}, _pkg_input_tensors{}, _pkg_output_tensors{}
+ {
+ for (const auto &edge : _model_edges->edges)
+ {
+ _edge_map[edge.from].emplace_back(edge.to);
+ }
+ }
+ Executors(const Executors &) = delete;
+ Executors(Executors &&) = default;
+ ~Executors() = default;
+
+ // TODO Use Executor index
+ void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ std::unique_ptr<IExecutor> exec) override;
+
+ IExecutor *at(const ir::ModelIndex &model_index,
+ const ir::SubgraphIndex &subg_index) const override;
+
+ uint32_t inputSize() const override;
+
+ uint32_t outputSize() const override;
+
+ const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const override;
+
+ const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const override;
+
+ void execute(const IODescription &desc) override;
+
+private:
+ void checkSupportedMultimodel() const;
+ void createEdgeQuantLayers();
+ void CreatePkgIOTensors(const IODescription &desc);
+ void createPkgIOQuantLayers(const IODescription &desc);
+ uint16_t modelCount() const;
+
+private:
+ // TODO Remove this class
+ class PermuteLayer : public exec::IPermuteFunction
+ {
+ public:
+ PermuteLayer(const std::vector<backend::ITensor *> &inputs,
+ const std::vector<backend::ITensor *> &outputs)
+ {
+ assert(inputs.size() == outputs.size());
+ _src_tensors = inputs;
+ _dst_tensors = outputs;
+ }
+ virtual ~PermuteLayer() {}
+ void optimize() override {}
+ };
+
+ class EdgeTensor;
+
+private:
+ std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<IExecutor>>
+ _executors;
+
+ // NOTE _model_edges may use different struct type for executor implementation
+ std::unique_ptr<ir::ModelEdges> _model_edges;
+ std::unordered_map<ir::IODesc, std::vector<ir::IODesc>> _edge_map;
+
+ /**
+ * @brief Type-aware quantization layers for edges between executors
+ *
+ */
+ // TODO Move variables related to type-aware quantization for edges into compilation stage
+ // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer
+ std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>>
+ _edge_quant_layers;
+
+ /**
+ * @brief Tensors for type-aware quantization of edges
+ * Key: `to` IODesc, Value: EdgeTensor
+ */
+ //
+ // Q: Why is Key `to` IODesc
+ // A: these tensors are currently created depending on the type of `to`
+ // TODO Unify tensors with the same `from` tensor and same type
+ // NOTE The incomplete type 'EdgeTensor' cannot be declared as unique_ptr.
+ std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _edge_quant_tensors;
+
+ /**
+ * @brief Tensors for edges between executors that are not related to type-aware quantization
+ * Key: `from` IODesc, Value: EdgeTensor
+ */
+ // Q: Why is Key `from` IODesc
+ // A: `from` can be connected to multiple `to`
+ // NOTE The incomplete type 'EdgeTensor' cannot be declared as unique_ptr.
+ std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _edge_tensors;
+ /**
+ * @brief Whether type-aware quantization layers for edges between executors are created
+ *
+ */
+ // TODO Remove this member after the creation of type-aware quantization layers for edges
+ // is moved into compilation stage
+ bool _is_created_edge_quant_layers;
+
+ // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer
+ std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>>
+ _pkg_input_quant_layers;
+ // TODO Replace PermuteLayer with backend::builtin::kernel::PermuteLayer
+ std::unordered_map<std::pair<ir::ModelIndex, ir::SubgraphIndex>, std::unique_ptr<PermuteLayer>>
+ _pkg_output_quant_layers;
+ // Edge tensors of nnpkg inputs/outputs for type-aware quantization
+ std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _pkg_input_quant_tensors;
+ std::unordered_map<ir::IODesc, std::shared_ptr<EdgeTensor>> _pkg_output_quant_tensors;
+ // IOTensors for user buffer
+ std::unordered_map<ir::IODesc, std::unique_ptr<backend::builtin::IOTensor>> _pkg_input_tensors;
+ std::unordered_map<ir::IODesc, std::unique_ptr<backend::builtin::IOTensor>> _pkg_output_tensors;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_EXECUTORS_H__
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IPermuteFunction.h"
+
+#include <cker/operation/Quantize.h>
+#include <cker/operation/Dequantize.h>
+#include "backend/IPortableTensor.h"
+#include "exec/IFunction.h"
+#include "ir/Index.h"
+#include "ir/Shape.h"
+#include <memory>
+#include <misc/polymorphic_downcast.h>
+#include <typeinfo>
+#include "util/Utils.h"
+#include <vector>
+#include <unordered_map>
+
+namespace
+{
+using namespace onert;
+
+inline nnfw::cker::Shape getShape(const backend::ITensor *tensor)
+{
+ const ir::Shape shape = tensor->getShape();
+
+ assert(tensor->layout() == ir::Layout::NHWC);
+
+ auto rank = shape.rank();
+ nnfw::cker::Shape ret(rank);
+ auto data = ret.DimsData();
+ for (int i = 0; i < rank; ++i)
+ {
+ data[i] = shape.dim(i);
+ }
+ return ret;
+}
+
+// Quantize per element
+template <typename InputT, typename OutputT>
+void elementwiseQuantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+ const auto scale = dst_tensor->data_scale();
+ const auto zero_point = dst_tensor->data_zero_point();
+
+ int min_val = std::numeric_limits<OutputT>::min();
+ int max_val = std::numeric_limits<OutputT>::max();
+
+ auto loop_shape = src_tensor->getShape();
+ const auto src_layout = src_tensor->layout();
+ const auto dst_layout = dst_tensor->layout();
+ const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
+ ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
+ const InputT *input_data =
+ reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
+ int32_t unclamped = static_cast<int32_t>(round(*input_data / scale)) + zero_point;
+ int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+
+ ir::Coordinates dst_coords =
+ is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
+ OutputT *output_data =
+ reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
+ *output_data = clamped;
+ });
+}
+
+// TODO Optimize the case where tensors has the same layout
+template <typename InputT, typename OutputT>
+void quantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+ if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
+ src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
+ {
+ assert(!dst_tensor->is_dynamic());
+
+ // Call optimized neon kernel
+ nnfw::cker::Quantize(getShape(src_tensor),
+ reinterpret_cast<const InputT *>(src_tensor->buffer()),
+ getShape(dst_tensor), reinterpret_cast<OutputT *>(dst_tensor->buffer()),
+ dst_tensor->data_scale(), dst_tensor->data_zero_point());
+ }
+ else
+ {
+ elementwiseQuantize<InputT, OutputT>(src_tensor, dst_tensor);
+ }
+}
+
+// Dequantize per element
+template <typename InputT, typename OutputT>
+void elementwiseDequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+ const auto scale = src_tensor->data_scale();
+ const auto zero_point = src_tensor->data_zero_point();
+
+ auto loop_shape = src_tensor->getShape();
+ const auto src_layout = src_tensor->layout();
+ const auto dst_layout = dst_tensor->layout();
+ const bool is_permutation = src_layout != dst_layout && loop_shape.rank() == 4;
+ ShapeLoop(loop_shape, [&](const onert::ir::Coordinates &coords) {
+ const InputT *input_data =
+ reinterpret_cast<const InputT *>(src_tensor->buffer() + src_tensor->calcOffset(coords));
+ const OutputT result = static_cast<OutputT>(scale * (*input_data - zero_point));
+
+ ir::Coordinates dst_coords =
+ is_permutation ? ir::convertCoordinates(coords, src_layout, dst_layout) : coords;
+ OutputT *output_data =
+ reinterpret_cast<OutputT *>(dst_tensor->buffer() + dst_tensor->calcOffset(dst_coords));
+ *output_data = result;
+ });
+}
+
+// TODO Optimize the case where tensors has the same layout
+template <typename InputT, typename OutputT>
+void dequantize(const backend::ITensor *src_tensor, backend::ITensor *dst_tensor)
+{
+ if (!src_tensor->has_padding() && !dst_tensor->has_padding() &&
+ src_tensor->layout() == dst_tensor->layout() && !src_tensor->is_dynamic())
+ {
+ assert(!dst_tensor->is_dynamic());
+
+ // Call optimized neon kernel
+ nnfw::cker::Dequantize(getShape(src_tensor),
+ reinterpret_cast<const InputT *>(src_tensor->buffer()),
+ getShape(dst_tensor), reinterpret_cast<OutputT *>(dst_tensor->buffer()),
+ src_tensor->data_scale(), src_tensor->data_zero_point());
+ }
+ else
+ {
+ elementwiseDequantize<InputT, OutputT>(src_tensor, dst_tensor);
+ }
+}
+
+template <typename SRC_T, typename DST_T,
+ std::enable_if_t<std::is_base_of<backend::ITensor, SRC_T>::value &&
+ std::is_base_of<backend::ITensor, DST_T>::value,
+ bool> = true>
+void typeAwareQuantize(const SRC_T *src_tensor, DST_T *dst_tensor)
+{
+ // TODO Support other types
+ if (src_tensor->data_type() == ir::DataType::FLOAT32)
+ {
+ switch (dst_tensor->data_type())
+ {
+ case ir::DataType::QUANT_UINT8_ASYMM:
+ {
+ quantize<float, uint8_t>(src_tensor, dst_tensor);
+ break;
+ }
+ case ir::DataType::QUANT_INT8_SYMM:
+ {
+ quantize<float, int8_t>(src_tensor, dst_tensor);
+ break;
+ }
+ case ir::DataType::QUANT_INT16_SYMM:
+ {
+ quantize<float, int16_t>(src_tensor, dst_tensor);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("IPermuteFunction: Unsupported quantization type");
+ break;
+ }
+ }
+ }
+ else if (dst_tensor->data_type() == ir::DataType::FLOAT32)
+ {
+ switch (src_tensor->data_type())
+ {
+ case ir::DataType::QUANT_UINT8_ASYMM:
+ {
+ dequantize<uint8_t, float>(src_tensor, dst_tensor);
+ break;
+ }
+ case ir::DataType::QUANT_INT8_SYMM:
+ {
+ dequantize<int8_t, float>(src_tensor, dst_tensor);
+ break;
+ }
+ case ir::DataType::QUANT_INT16_SYMM:
+ {
+ dequantize<int16_t, float>(src_tensor, dst_tensor);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("IPermuteFunction: Unsupported dequantization type");
+ break;
+ }
+ }
+ }
+ else
+ {
+ throw std::runtime_error("IPermuteFunction: Unsupported type for type-aware quantization yet");
+ }
+}
+
+} // namespace
+
+namespace onert
+{
+namespace exec
+{
+
+void IPermuteFunction::IPermuteFunction::run()
+{
+ // TODO Optimization : Make control does not reach here? when (_src_tensors.size() == 0)
+ assert(_src_tensors.size() == _dst_tensors.size());
+ if (_src_tensors_offsets.size() == 0)
+ {
+ _src_tensors_offsets.resize(_src_tensors.size());
+ _dst_tensors_offsets.resize(_dst_tensors.size());
+ }
+ assert(_src_tensors.size() == _src_tensors_offsets.size());
+ assert(_src_tensors_offsets.size() == _dst_tensors_offsets.size());
+
+ for (size_t i = 0; i < _src_tensors.size(); ++i)
+ {
+ auto src_tensor = _src_tensors.at(i);
+ auto dst_tensor = _dst_tensors.at(i);
+ auto &src_offsets = _src_tensors_offsets.at(i);
+ auto &dst_offsets = _dst_tensors_offsets.at(i);
+ if (src_tensor != dst_tensor)
+ {
+ const auto rank = src_tensor->getShape().rank();
+ permute(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ }
+ }
+}
+
+void IPermuteFunction::permute(backend::ITensor *src_tensor, backend::ITensor *dst_tensor,
+ size_t rank, std::vector<size_t> &src_offsets,
+ std::vector<size_t> &dst_offsets)
+{
+ if (src_tensor->total_size() == 0)
+ {
+ assert(dst_tensor->total_size() == 0);
+ return;
+ }
+
+ assert(src_tensor != dst_tensor);
+ if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type()))
+ {
+ typeAwareQuantize(src_tensor, dst_tensor);
+ return;
+ }
+
+ switch (src_tensor->data_type())
+ {
+ case ir::DataType::FLOAT32:
+ permute<float>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::INT32:
+ permute<int32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::UINT32:
+ permute<uint32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::BOOL8:
+ case ir::DataType::QUANT_UINT8_ASYMM:
+ case ir::DataType::UINT8:
+ permute<uint8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::QUANT_INT8_ASYMM:
+ case ir::DataType::QUANT_INT8_SYMM:
+ permute<int8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::INT64:
+ permute<int64_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ case ir::DataType::QUANT_INT16_SYMM:
+ permute<int16_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+ break;
+ default:
+ throw std::runtime_error("IPermuteFunction: Not supported data type");
+ break;
+ }
+}
+
+const std::type_info &IPermuteFunction::underlying_type(ir::DataType type) const
+{
+ switch (type)
+ {
+ case ir::DataType::FLOAT32:
+ return typeid(float);
+ case ir::DataType::INT32:
+ return typeid(int32_t);
+ case ir::DataType::UINT32:
+ return typeid(uint32_t);
+ case ir::DataType::INT64:
+ return typeid(int64_t);
+ case ir::DataType::BOOL8:
+ case ir::DataType::QUANT_UINT8_ASYMM:
+ case ir::DataType::UINT8:
+ return typeid(uint8_t);
+ case ir::DataType::QUANT_INT8_ASYMM:
+ case ir::DataType::QUANT_INT8_SYMM:
+ return typeid(int8_t);
+ case ir::DataType::QUANT_INT16_SYMM:
+ return typeid(int16_t);
+ default:
+ throw std::runtime_error("IPermuteFunction: Not supported data type");
+ }
+}
+
+} // namespace exec
+} // namespace onert
#include "backend/ITensor.h"
#include "exec/IFunction.h"
-#include "ir/Index.h"
-#include "ir/Shape.h"
#include <memory>
-#include <typeinfo>
-#include "util/Utils.h"
#include <vector>
#include <unordered_map>
};
public:
- virtual void run() override
- {
- // TODO Optimization : Make control does not reach here? when (_src_tensors.size() == 0)
- assert(_src_tensors.size() == _dst_tensors.size());
- if (_src_tensors_offsets.size() == 0)
- {
- _src_tensors_offsets.resize(_src_tensors.size());
- _dst_tensors_offsets.resize(_dst_tensors.size());
- }
- assert(_src_tensors.size() == _src_tensors_offsets.size());
- assert(_src_tensors_offsets.size() == _dst_tensors_offsets.size());
-
- for (size_t i = 0; i < _src_tensors.size(); ++i)
- {
- auto src_tensor = _src_tensors.at(i);
- auto dst_tensor = _dst_tensors.at(i);
- auto &src_offsets = _src_tensors_offsets.at(i);
- auto &dst_offsets = _dst_tensors_offsets.at(i);
- if (src_tensor != dst_tensor)
- {
- const auto rank = src_tensor->getShape().rank();
- permute(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
- }
- }
- }
+ virtual void run() override;
virtual void prepare() override { optimize(); }
protected:
void permute(backend::ITensor *src_tensor, backend::ITensor *dst_tensor, size_t rank,
- std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets)
- {
- if (src_tensor->total_size() == 0)
- {
- assert(dst_tensor->total_size() == 0);
- return;
- }
-
- assert(src_tensor != dst_tensor);
- if (underlying_type(src_tensor->data_type()) != underlying_type(dst_tensor->data_type()))
- throw std::runtime_error("data type does not match");
- switch (src_tensor->data_type())
- {
- case ir::DataType::FLOAT32:
- permute<float>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
- break;
- case ir::DataType::INT32:
- permute<int32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
- break;
- case ir::DataType::UINT32:
- permute<uint32_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
- break;
- case ir::DataType::BOOL8:
- case ir::DataType::QUANT_UINT8_ASYMM:
- case ir::DataType::UINT8:
- permute<uint8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
- break;
- case ir::DataType::QUANT_INT8_ASYMM:
- case ir::DataType::QUANT_INT8_SYMM:
- permute<int8_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
- break;
- case ir::DataType::INT64:
- permute<int64_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
- break;
- case ir::DataType::QUANT_INT16_SYMM:
- permute<int16_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
- break;
- default:
- throw std::runtime_error("IPermuteFunction: Not supported data type");
- break;
- }
- }
+ std::vector<size_t> &src_offsets, std::vector<size_t> &dst_offsets);
private:
// TODO make src const by proving const access()
// NOTE The typeid expression is lvalue expression which refers to an object with static storage
// duration, of the polymorphic type const std::type_info or of some type derived from it.
// So std::type_info is non-copyable
- const std::type_info &underlying_type(ir::DataType type) const
- {
- switch (type)
- {
- case ir::DataType::FLOAT32:
- return typeid(float);
- case ir::DataType::INT32:
- return typeid(int32_t);
- case ir::DataType::UINT32:
- return typeid(uint32_t);
- case ir::DataType::INT64:
- return typeid(int64_t);
- case ir::DataType::BOOL8:
- case ir::DataType::QUANT_UINT8_ASYMM:
- case ir::DataType::UINT8:
- return typeid(uint8_t);
- case ir::DataType::QUANT_INT8_ASYMM:
- case ir::DataType::QUANT_INT8_SYMM:
- return typeid(int8_t);
- case ir::DataType::QUANT_INT16_SYMM:
- return typeid(int16_t);
- default:
- throw std::runtime_error("IPermuteFunction: Not supported data type");
- }
- }
+ const std::type_info &underlying_type(ir::DataType type) const;
protected:
std::vector<backend::ITensor *> _src_tensors;
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IPermuteFunction.h"
+
+#include <ir/Layout.h>
+#include <ir/Shape.h>
+#include <ir/TypeInfo.h>
+
+#include <cmath>
+#include <gtest/gtest.h>
+
+namespace
+{
+using namespace onert;
+using namespace ir;
+using namespace backend;
+using namespace exec;
+
+class MockUpTensor : public ITensor
+{
+public:
+ MockUpTensor(const Shape &shape, const TypeInfo &type_info, Layout layout, size_t pad)
+ : _shape(shape), _type_info(type_info), _data(nullptr), _layout(layout)
+ {
+ _strides.resize(shape.rank());
+
+ std::vector<size_t> pads(shape.rank(), 0);
+ pads[shape.rank() - 1] = pad;
+ size_t stride = 1;
+ for (int32_t i = _shape.rank() - 1; i >= 0; --i)
+ {
+ _strides.at(i) = stride;
+ stride = stride * (_shape.dim(i) + pads.at(i));
+ }
+ }
+ virtual ~MockUpTensor() {}
+
+ void setBuffer(uint8_t *data) { _data = data; }
+
+ size_t total_size() const override
+ {
+ size_t total_size = _strides[0] * _shape.dim(0);
+ total_size *= sizeOfDataType(data_type());
+ return total_size;
+ }
+
+ size_t calcOffset(const ir::Coordinates &coords) const override
+ {
+ size_t offset = 0;
+ for (size_t i = 0; i < _shape.rank(); ++i)
+ {
+ offset += (_strides[i] * coords[i]);
+ }
+ offset *= sizeOfDataType(data_type());
+ return offset;
+ }
+
+ uint8_t *buffer() const override { return _data; }
+
+ ir::Layout layout() const override { return _layout; }
+ ir::DataType data_type() const override { return _type_info.type(); }
+ float data_scale() const override { return _type_info.scale(); }
+ int32_t data_zero_point() const override { return _type_info.zero_point(); }
+ const std::vector<float> &data_scales() const override { return _type_info.scales(); }
+ const std::vector<int32_t> &data_zero_points() const override { return _type_info.zero_points(); }
+ bool has_padding() const override
+ {
+ return total_size() / sizeOfDataType(data_type()) != _shape.num_elements();
+ }
+ void access(const std::function<void(ITensor &tensor)> &fn) final { fn(*this); }
+
+ bool is_dynamic() const override { return false; }
+ Shape getShape() const override { return _shape; }
+
+private:
+ Shape _shape;
+ TypeInfo _type_info;
+ Layout _layout;
+ uint8_t *_data;
+ std::vector<size_t> _strides;
+};
+
+class MockUpLayer : public IPermuteFunction
+{
+public:
+ MockUpLayer(const std::vector<ITensor *> &inputs, const std::vector<ITensor *> &outputs)
+ {
+ assert(inputs.size() == outputs.size());
+ _src_tensors = inputs;
+ _dst_tensors = outputs;
+ }
+ virtual ~MockUpLayer() {}
+ void optimize() override {}
+};
+
+TEST(IPermuteFunction, float_rank1)
+{
+ const size_t input_pads[4] = {0, 1, 0, 2};
+ const size_t output_pads[4] = {0, 0, 2, 1};
+ const std::vector<Shape> shapes{{1}, {4}, {5}, {2}};
+ float expected_buffer[] = {1, 0, -1, -2, 3};
+ const auto type_info = TypeInfo(DataType::FLOAT32);
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ Coordinates coords{j};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_rank2)
+{
+ const size_t input_pads[4] = {0, 1, 0, 2};
+ const size_t output_pads[4] = {0, 0, 2, 1};
+ const std::vector<Shape> shapes{{1, 4}, {2, 2}, {1, 5}, {2, 3}};
+ float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8};
+ const auto type_info = TypeInfo(DataType::FLOAT32);
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ Coordinates coords{j, k};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_rank3)
+{
+ const size_t input_pads[4] = {0, 5, 0, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 4, 1}, {1, 2, 1}, {2, 1, 5}, {1, 2, 3}};
+ float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10};
+ const auto type_info = TypeInfo(DataType::FLOAT32);
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ Coordinates coords{j, k, l};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_rank4)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10};
+ const auto type_info = TypeInfo(DataType::FLOAT32);
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_rank4_layout)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {1, 0, -1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16};
+ const auto type_info = TypeInfo(DataType::FLOAT32);
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ Layout layout = Layout::NHWC;
+ Shape shape = shapes[i];
+ if (i % 2 == 1)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ inputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ if (layout == Layout::NHWC)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ else
+ {
+ layout = Layout::NHWC;
+ shape = shapes[i];
+ }
+ outputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates input_coords;
+ Coordinates output_coords;
+ if (inputs[i]->layout() == Layout::NHWC)
+ {
+ input_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ input_coords = Coordinates{j, m, k, l};
+ }
+ if (outputs[i]->layout() == Layout::NHWC)
+ {
+ output_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ output_coords = Coordinates{j, m, k, l};
+ }
+ float result = *reinterpret_cast<float *>(outputs[i]->buffer() +
+ outputs[i]->calcOffset(output_coords));
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(input_coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_to_qasymm8)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 128;
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC,
+ input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ uint8_t qasymm8 =
+ *reinterpret_cast<uint8_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float result = (qasymm8 - zero_point) * scale;
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_to_qsymm8)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 0;
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC,
+ input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ TypeInfo type_info{DataType::QUANT_INT8_SYMM, scale, zero_point};
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ int8_t qsymm8 =
+ *reinterpret_cast<int8_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float result = (qsymm8 - zero_point) * scale;
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_to_qsymm16)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 0;
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32), Layout::NHWC,
+ input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ TypeInfo type_info{DataType::QUANT_INT16_SYMM, scale, zero_point};
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ int16_t qsymm16 =
+ *reinterpret_cast<int16_t *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ float result = (qsymm16 - zero_point) * scale;
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, qasymm8_to_float)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 128;
+ uint8_t input_buffer[12];
+
+ int32_t min_val = std::numeric_limits<uint8_t>::min();
+ int32_t max_val = std::numeric_limits<uint8_t>::max();
+ for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+ {
+ int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+ input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+ }
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32),
+ Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ uint8_t qasymm8 =
+ *reinterpret_cast<uint8_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ float expected = (qasymm8 - zero_point) * scale;
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, qsymm8_to_float)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 0;
+ uint8_t input_buffer[12];
+
+ int32_t min_val = std::numeric_limits<int8_t>::min();
+ int32_t max_val = std::numeric_limits<int8_t>::max();
+ for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+ {
+ int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+ input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+ }
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ TypeInfo type_info{DataType::QUANT_INT8_SYMM, scale, zero_point};
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32),
+ Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ int8_t qasymm8 =
+ *reinterpret_cast<int8_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ float expected = (qasymm8 - zero_point) * scale;
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, qsymm16_to_float)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70, -80, 90, -100};
+ float scale = 10;
+ int32_t zero_point = 0;
+ uint8_t input_buffer[12];
+
+ int32_t min_val = std::numeric_limits<int16_t>::min();
+ int32_t max_val = std::numeric_limits<int16_t>::max();
+ for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+ {
+ int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+ input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+ }
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ TypeInfo type_info{DataType::QUANT_INT16_SYMM, scale, zero_point};
+ inputs[i] = std::make_unique<MockUpTensor>(shapes[i], type_info, Layout::NHWC, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(input_buffer));
+
+ outputs[i] = std::make_unique<MockUpTensor>(shapes[i], TypeInfo(DataType::FLOAT32),
+ Layout::NHWC, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates coords{j, k, l, m};
+ float result =
+ *reinterpret_cast<float *>(outputs[i]->buffer() + outputs[i]->calcOffset(coords));
+ int16_t qasymm8 =
+ *reinterpret_cast<int16_t *>(inputs[i]->buffer() + inputs[i]->calcOffset(coords));
+ float expected = (qasymm8 - zero_point) * scale;
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, float_to_qasymm8_layout)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70,
+ -80, 90, -100, 110, -120, 130, -140, 150, -160};
+ float scale = 10;
+ int32_t zero_point = 128;
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ Layout layout = Layout::NHWC;
+ Shape shape = shapes[i];
+ if (i % 2 == 1)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ inputs[i] =
+ std::make_unique<MockUpTensor>(shape, TypeInfo(DataType::FLOAT32), layout, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ if (layout == Layout::NHWC)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ else
+ {
+ layout = Layout::NHWC;
+ shape = shapes[i];
+ }
+ TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+ outputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates input_coords;
+ Coordinates output_coords;
+ if (inputs[i]->layout() == Layout::NHWC)
+ {
+ input_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ input_coords = Coordinates{j, m, k, l};
+ }
+ if (outputs[i]->layout() == Layout::NHWC)
+ {
+ output_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ output_coords = Coordinates{j, m, k, l};
+ }
+ uint8_t qasymm8 = *reinterpret_cast<uint8_t *>(outputs[i]->buffer() +
+ outputs[i]->calcOffset(output_coords));
+ float result = (qasymm8 - zero_point) * scale;
+ float expected =
+ *reinterpret_cast<float *>(inputs[i]->buffer() + inputs[i]->calcOffset(input_coords));
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(IPermuteFunction, asymm8_to_float_layout)
+{
+ const size_t input_pads[4] = {0, 0, 1, 2};
+ const size_t output_pads[4] = {0, 3, 2, 1};
+ const std::vector<Shape> shapes{{1, 1, 4, 1}, {2, 1, 2, 3}, {1, 2, 1, 2}, {1, 1, 2, 3}};
+ float expected_buffer[] = {10, 0, -10, -20, 30, -40, 50, -60, 70,
+ -80, 90, -100, 110, -120, 130, -140, 150, -160};
+ float scale = 10;
+ int32_t zero_point = 128;
+ uint8_t input_buffer[18];
+
+ int32_t min_val = std::numeric_limits<int16_t>::min();
+ int32_t max_val = std::numeric_limits<int16_t>::max();
+ for (int32_t i = 0; i < sizeof(expected_buffer) / sizeof(float); ++i)
+ {
+ int32_t unclamped = static_cast<int32_t>(std::round(expected_buffer[i] / scale)) + zero_point;
+ input_buffer[i] = std::min(std::max(unclamped, min_val), max_val);
+ }
+
+ std::vector<std::unique_ptr<MockUpTensor>> inputs(4);
+ std::vector<std::unique_ptr<MockUpTensor>> outputs(4);
+ std::vector<std::unique_ptr<uint8_t[]>> output_buffers(4);
+ for (size_t i = 0; i < 4; ++i)
+ {
+ Layout layout = Layout::NHWC;
+ Shape shape = shapes[i];
+ if (i % 2 == 1)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ TypeInfo type_info{DataType::QUANT_UINT8_ASYMM, scale, zero_point};
+ inputs[i] = std::make_unique<MockUpTensor>(shape, type_info, layout, input_pads[i]);
+ inputs[i]->setBuffer(reinterpret_cast<uint8_t *>(expected_buffer));
+
+ if (layout == Layout::NHWC)
+ {
+ layout = Layout::NCHW;
+ shape = Shape{shapes[i].dim(0), shapes[i].dim(3), shapes[i].dim(1), shapes[i].dim(2)};
+ }
+ else
+ {
+ layout = Layout::NHWC;
+ shape = shapes[i];
+ }
+ outputs[i] =
+ std::make_unique<MockUpTensor>(shape, TypeInfo(DataType::FLOAT32), layout, output_pads[i]);
+ output_buffers[i] = std::make_unique<uint8_t[]>(outputs[i]->total_size());
+ outputs[i]->setBuffer(output_buffers[i].get());
+ }
+
+ auto mockup_layer = std::make_unique<MockUpLayer>(
+ std::vector<ITensor *>{inputs[0].get(), inputs[1].get(), inputs[2].get(), inputs[3].get()},
+ std::vector<ITensor *>{outputs[0].get(), outputs[1].get(), outputs[2].get(), outputs[3].get()});
+ mockup_layer->run();
+
+ for (size_t i = 0; i < 4; ++i)
+ {
+ for (int32_t j = 0; j < shapes[i].dim(0); ++j)
+ {
+ for (int32_t k = 0; k < shapes[i].dim(1); ++k)
+ {
+ for (int32_t l = 0; l < shapes[i].dim(2); ++l)
+ {
+ for (int32_t m = 0; m < shapes[i].dim(3); ++m)
+ {
+ Coordinates input_coords;
+ Coordinates output_coords;
+ if (inputs[i]->layout() == Layout::NHWC)
+ {
+ input_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ input_coords = Coordinates{j, m, k, l};
+ }
+ if (outputs[i]->layout() == Layout::NHWC)
+ {
+ output_coords = Coordinates{j, k, l, m};
+ }
+ else
+ {
+ output_coords = Coordinates{j, m, k, l};
+ }
+ float result = *reinterpret_cast<float *>(outputs[i]->buffer() +
+ outputs[i]->calcOffset(output_coords));
+ uint8_t qasymm8 = *reinterpret_cast<uint8_t *>(inputs[i]->buffer() +
+ inputs[i]->calcOffset(input_coords));
+ float expected = (qasymm8 - zero_point) * scale;
+ EXPECT_EQ(result, expected);
+ }
+ }
+ }
+ }
+ }
+}
+
+} // namespace
void ParallelScheduler::finish()
{
- for (auto &itr : _thread_pools)
+ for (auto &&itr : _thread_pools)
{
itr.second->finish();
}
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SingleModelExecutors.h"
+
+#include "../backend/builtin/IOTensor.h"
+
+namespace onert
+{
+namespace exec
+{
+
+void SingleModelExecutors::emplace(const ir::ModelIndex &, const ir::SubgraphIndex &subg_index,
+ std::unique_ptr<IExecutor> exec)
+{
+ _executors.emplace(subg_index, std::move(exec));
+}
+
+IExecutor *SingleModelExecutors::at(const ir::ModelIndex &,
+ const ir::SubgraphIndex &subg_index) const
+{
+ return _executors.at(subg_index).get();
+}
+
+uint32_t SingleModelExecutors::inputSize() const
+{
+ return entryExecutor()->getInputTensors().size();
+}
+
+uint32_t SingleModelExecutors::outputSize() const
+{
+ return entryExecutor()->getOutputTensors().size();
+}
+
+const ir::OperandInfo &SingleModelExecutors::inputInfo(const ir::IOIndex &index) const
+{
+ return entryExecutor()->getInputTensors().at(index.value())->orig_info();
+}
+
+const ir::OperandInfo &SingleModelExecutors::outputInfo(const ir::IOIndex &index) const
+{
+ return entryExecutor()->getOutputTensors().at(index.value())->orig_info();
+}
+
+void SingleModelExecutors::execute(const IODescription &desc) { entryExecutor()->execute(desc); }
+
+} // namespace exec
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__
+#define __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__
+
+#include "exec/IExecutors.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather executor set for single model NN package
+ */
+class SingleModelExecutors : public IExecutors
+{
+public:
+ /**
+ * @brief Construct a new SingleModelExecutors object
+ */
+ SingleModelExecutors(void) = default;
+ SingleModelExecutors(const SingleModelExecutors &) = delete;
+ SingleModelExecutors(SingleModelExecutors &&) = default;
+
+ /**
+ * @brief Destroy the SingleModelExecutors object
+ */
+ ~SingleModelExecutors() = default;
+
+public:
+ void emplace(const ir::ModelIndex &model_index, const ir::SubgraphIndex &subg_index,
+ std::unique_ptr<IExecutor> exec) override;
+
+ IExecutor *at(const ir::ModelIndex &model_index,
+ const ir::SubgraphIndex &subg_index) const override;
+
+ uint32_t inputSize() const override;
+
+ uint32_t outputSize() const override;
+
+ const ir::OperandInfo &inputInfo(const ir::IOIndex &index) const override;
+
+ const ir::OperandInfo &outputInfo(const ir::IOIndex &index) const override;
+
+ void execute(const IODescription &desc) override;
+
+private:
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>> _executors;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_SINGLE_MODEL_EXECUTORS_H__
void ThreadPool::join()
{
- for (auto &thread : _threads)
+ for (auto &&thread : _threads)
{
thread.join();
}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Buffer.h
- * @brief This file contains Buffer interface and InternalBuffer, ExternalBuffer class
- */
-#ifndef __ONERT_INTERP_BUFFER_H__
-#define __ONERT_INTERP_BUFFER_H__
-
-#include <memory>
-
-#include "ir/Data.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Interface for writable data area
- */
-class Buffer : public ir::Data
-{
-public:
- /**
- * @brief Return writable pointer for data area
- * @return Writable pointer
- */
- virtual uint8_t *baseWritable(void) const = 0;
-};
-
-/**
- * @brief Class for internally allocated data area
- */
-class InternalBuffer final : public Buffer
-{
-public:
- InternalBuffer(size_t size) : _base{std::make_unique<uint8_t[]>(size)}, _size{size}
- {
- // DO NOTHING
- }
-
-public:
- size_t size(void) const override { return _size; }
- const uint8_t *base(void) const override { return _base.get(); }
- uint8_t *baseWritable(void) const override { return _base.get(); }
-
-private:
- std::unique_ptr<uint8_t[]> _base;
- size_t _size;
-};
-
-/**
- * @brief Class for data area from outside
- */
-class ExternalBuffer final : public Buffer
-{
-public:
- ExternalBuffer(uint8_t *base, size_t size) : _base{base}, _size{size}
- {
- // DO NOTHING
- }
-
-public:
- size_t size(void) const override { return _size; }
- const uint8_t *base(void) const override { return _base; }
- uint8_t *baseWritable(void) const override { return _base; }
-
-private:
- uint8_t *_base;
- size_t _size;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_BUFFER_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file ExecEnv.h
- * @brief This file contains ExecEnv to access interpreter tensor and execution status
- */
-#ifndef __ONERT_INTERP_EXEC_ENV_H_
-#define __ONERT_INTERP_EXEC_ENV_H_
-
-#include <unordered_set>
-
-#include "ir/Graph.h"
-#include "Tensor.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Class to gather interpreter execution environment
- * Each interpreter instance own execution environment
- */
-class ExecEnv
-{
-public:
- /**
- * @brief Construct a new Exec Env object (deleted)
- */
- ExecEnv(void) = delete;
- /**
- * @brief Construct a new ExecEnv object
- * @param[in] graph Graph to execute by interpreter
- */
- explicit ExecEnv(const ir::Graph &graph) : _graph(graph)
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Return graph to execute
- * @return Graph
- */
- const ir::Graph &graph(void) const { return _graph; }
- /**
- * @brief Assign tensor to environment which have allocated or assigned buffer
- * @param[in] index Tensor index
- * @param[in] tensor Tensor
- */
- void assignTensor(const ir::OperandIndex index, std::shared_ptr<ITensor> tensor)
- {
- assert(tensor->bufferRO() != nullptr);
- _tensors.emplace(index, tensor);
- }
-
- /**
- * @brief Return tensor pointer in environment
- * @param[in] index Tensor index
- * can_optional @c True if tensor can be optional input, otherwise @c false
- * @return Tensor pointer
- */
- const ITensor *tensorAt(const ir::OperandIndex index, bool can_optional = false) const
- {
- if (_tensors.find(index) == _tensors.end())
- {
- // It may optional input,
- // otherwise input is not set by runtime user
- if (can_optional)
- {
- return nullptr;
- }
-
- throw std::runtime_error{"ExecEnv: Input is not set"};
- }
-
- return _tensors.at(index).get();
- }
-
- /**
- * @brief Check environment contains tensor
- * @param[in] index Tensor index
- * @return @c true if environment contain tensor, otherwise @c false
- */
- bool contains(const ir::OperandIndex index) const
- {
- return (_tensors.find(index) != _tensors.end());
- }
-
- /**
- * @brief Allocate tensor using operand info
- * @param[in] index Tensor index
- * @param[in] info Operand info
- * @note If already allocated, just return
- * @TODO More smart allocation policy
- */
- void allocateIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info)
- {
- // already allocated, or constant
- if (contains(index))
- {
- return;
- }
-
- // Buffer from external (ex. model output)
- auto tensor = std::make_shared<Tensor>(info);
- if (isExtBuffer(index))
- {
- tensor->setBuffer(_external_buffers.at(index));
- assignTensor(index, tensor);
-
- return;
- }
-
- tensor->setBuffer(std::make_shared<InternalBuffer>(tensor->total_size()));
- assignTensor(index, tensor);
- _buffers.insert(index);
- }
-
- /**
- * @brief Allocate read-only tensor and share data with other tensor
- * @param[in] index Tensor index
- * @param[in] info Operand info
- * @param[in] index_to_share Tensor index that have data to share
- */
- void allocateAndShareIfNeeded(const ir::OperandIndex index, const ir::OperandInfo &info,
- const ir::OperandIndex index_to_share)
- {
- if (!contains(index_to_share))
- {
- throw std::runtime_error{"Cannot find tensor to share data"};
- }
-
- // already allocated
- if (contains(index))
- {
- return;
- }
-
- if (isExtBuffer(index))
- {
- auto tensor = std::make_shared<Tensor>(info);
- tensor->setBuffer(_external_buffers.at(index));
- assignTensor(index, tensor);
- }
- else
- {
- auto tensor = std::make_shared<ROTensor>(info);
- tensor->setData(tensorAt(index_to_share)->shareData());
- assignTensor(index, tensor);
- _buffers.insert(index);
- }
- }
-
- /**
- * @brief Free buffer if allocated by allocateIfNeed
- * @param[in] index Tensor index
- * @note If allocated by outside, just return
- */
- void freeIfAllocated(const ir::OperandIndex index)
- {
- if (_buffers.find(index) != _buffers.end())
- {
- _tensors.at(index)->releaseData();
- }
- }
-
- /**
- * @brief Assign ExternalBuffer into external buffer map
- * @param[in] index Tensor index
- * @param[in] buffer External buffer
- */
- void assignExternalBuffer(const ir::OperandIndex index, std::shared_ptr<ExternalBuffer> buffer)
- {
- _external_buffers.emplace(index, buffer);
- }
-
-private:
- bool isExtBuffer(const ir::OperandIndex index)
- {
- return (_external_buffers.find(index) != _external_buffers.end());
- }
-
-private:
- const ir::Graph &_graph;
- // Tensor map to use in interpreter
- // It should map tensors that have allocated or assigned buffer pointer
- std::unordered_map<ir::OperandIndex, std::shared_ptr<ITensor>> _tensors;
- // Tensors allocated by allocateIfNeed (buffer)
- std::unordered_set<ir::OperandIndex> _buffers;
- // Tensor buffer from external
- std::unordered_map<ir::OperandIndex, std::shared_ptr<ExternalBuffer>> _external_buffers;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_EXEC_ENV_H_
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "InterpExecutor.h"
-
-#include "ExecEnv.h"
-#include "Interpreter.h"
-
-#include "util/logging.h"
-
-#include <memory>
-
-namespace onert
-{
-namespace interp
-{
-
-void InterpExecutor::execute(const exec::IODescription &desc)
-{
- /************************************************************************
- * Prepare execution model (submodel)
- It may execute divided model
- but now consider model inference is done at interpreter
- ***********************************************************************/
- ir::OperandIndexMap<std::shared_ptr<ITensor>> tensor_map;
-
- for (uint32_t n = 0; n < _graph.getInputs().size(); n++)
- {
- ir::IOIndex index{n};
- const auto input_index = _graph.getInputs().at(index);
-
- const auto input = desc.inputs.at(n).get();
- if (input == nullptr)
- {
- // Optional input
- continue;
- }
-
- auto input_tensor = std::make_shared<ROTensor>(input->info);
- input_tensor->setData(std::make_shared<const ir::ExternalData>(
- reinterpret_cast<const uint8_t *>(input->buffer), input->size));
- tensor_map[input_index] = input_tensor;
- }
-
- /************************************************************************
- * Prepare execution environment
- Execution environment will be assigned to invoked interpreter instance
- ***********************************************************************/
-
- std::unique_ptr<ExecEnv> interp_env = std::make_unique<ExecEnv>(_graph);
-
- // Assign input/output tensor into interpreter execution environment
- for (auto index : _graph.getInputs())
- {
- if (tensor_map.find(index) != tensor_map.end())
- {
- VERBOSE(INTERPRETER) << "Assign input tensor. operand index:" << index << std::endl;
- interp_env->assignTensor(index, tensor_map.at(index));
- }
- }
-
- for (uint32_t n = 0; n < _graph.getOutputs().size(); n++)
- {
- ir::IOIndex index{n};
- const auto output_index = _graph.getOutputs().at(index);
- const auto output = desc.outputs.at(n).get();
- if (output == nullptr)
- {
- // Optional output
- continue;
- }
-
- VERBOSE(INTERPRETER) << "Set out buffer to ExecEnv. operand index:" << output_index.value()
- << std::endl;
-
- interp_env->assignExternalBuffer(
- output_index,
- std::make_shared<ExternalBuffer>(reinterpret_cast<uint8_t *>(output->buffer), output->size));
- }
-
- // Allocate constant tensor
- _graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- if (obj.isConstant())
- {
- VERBOSE(INTERPRETER) << "Allocate and assign constant tensor. operand index:" << ind
- << std::endl;
-
- assert(obj.data());
- auto const_tensor = std::make_shared<ROTensor>(obj.info());
- // Assume that interpreter's tensor layout is same with model (NHWC)
- const_tensor->setData(
- std::make_shared<ir::ExternalData>(obj.data()->base(), obj.info().total_size()));
- interp_env->assignTensor(ind, const_tensor);
- }
- });
-
- /*****************************************************************************
- * Invoke interpreter
- ****************************************************************************/
-
- interp::Interpreter interp(std::move(interp_env));
- interp.run();
-
- /*****************************************************************************
- * Invoked interpreter run is finished
- ****************************************************************************/
-
- // If interpreter execute submodel
- // 1. Get tensor output of submodel into tensor_map to save result
- // 2. Generate new ExecEnv for next interpretation
-}
-
-} // namespace interp
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file InterpExecutor.h
- * @brief This file contains InterpExecutor class\n
- * to manage interpreter execution and environment
- */
-#ifndef __ONERT_INTERP_INTERP_EXECUTOR_H__
-#define __ONERT_INTERP_INTERP_EXECUTOR_H__
-
-#include "ir/OperandIndexMap.h"
-#include "ir/Graph.h"
-#include "exec/IExecutor.h"
-
-namespace onert
-{
-namespace interp
-{
-
-class ITensor;
-
-/**
- * @brief Class to execute model using interpreter
- */
-class InterpExecutor final : public exec::IExecutor
-{
-public:
- explicit InterpExecutor(const ir::Graph &graph) : _graph(graph)
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Return graph object
- * @return Graph object
- */
- const ir::Graph &graph() final { return _graph; }
-
- const ir::Graph &parent_graph() final
- {
- throw new std::runtime_error{"Interpreter does not support this function."};
- }
- void setIndexedRanks(std::shared_ptr<ir::OperationIndexMap<int64_t>>) override{
- // Not implemented
- };
- /**
- * @brief Start execution
- * @note It should be called after setting input and output buffer
- */
- void execute(const exec::IODescription &desc) final;
- void execute(const std::vector<backend::IPortableTensor *> &,
- const std::vector<backend::IPortableTensor *> &) final
- {
- throw new std::runtime_error{"Interpreter does not support subgraph calls(control flow ops)"};
- }
- const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const final
- {
- throw new std::runtime_error{"Interpreter does not support this function."};
- }
-
-private:
- /**
- * @brief Copy of target graph for lowering
- * @note It uses copy of graph, not reference.
- * Original graph may be deallocated by frontend.
- */
- const ir::Graph _graph;
- ir::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_INTERP_EXECUTOR_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "InterpExecutor.h"
-
-#include "exec/Execution.h"
-#include "ir/Graph.h"
-#include "ir/operation/BinaryArithmetic.h"
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-namespace
-{
-
-using namespace onert::ir;
-using InterpExecutor = onert::interp::InterpExecutor;
-using Execution = onert::exec::Execution;
-using Executors = onert::exec::Executors;
-
-class InterpExecutorTest : public ::testing::Test
-{
-protected:
- virtual void SetUp() {}
- void CreateSimpleModel()
- {
- // Model: one elementwise add operation
- // model input: lhs, rhs
- // model output: add result
- // lhs, rhs, result shape: {1, 2, 2, 1}
- // activation: none (constant)
- _graph = std::make_unique<Graph>();
-
- // Add operands
-
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::INT32};
- Shape shape_scalar(0);
- TypeInfo type_scalar{DataType::INT32};
-
- auto operand_lhs = _graph->addOperand(shape, type);
- auto operand_rhs = _graph->addOperand(shape, type);
- auto operand_result = _graph->addOperand(shape, type);
-
- // Add operations
-
- operation::BinaryArithmetic::Param param;
- param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
- param.activation = Activation::NONE;
- auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
- auto output_set = OperandIndexSequence{operand_result};
- _graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
-
- // Identify model inputs and outputs
-
- _graph->getInputs().append(operand_lhs);
- _graph->getInputs().append(operand_rhs);
- _graph->getOutputs().append(operand_result);
-
- _graph->verify();
-
- auto model = std::make_shared<onert::ir::Model>();
- model->push(onert::ir::SubgraphIndex{0}, _graph);
-
- _executors = std::make_shared<Executors>();
- _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
- }
-
- void CreateTwoStepModel()
- {
- // Model: two elementwise add operation
- // model input: lhs, rhs1
- // model output: second add result (result2)
- // constant: rhs2
- // result1 <= (lhs + rhs)
- // result2 <= (result1 + rhs2)
- // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
- // activation: none (constant)
- _graph = std::make_unique<Graph>();
-
- // 1st add operands (result1 <= lhs + rhs1)
-
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::INT32};
- Shape shape_scalar(0);
- TypeInfo type_scalar{DataType::INT32};
-
- static int32_t rhs2_data[4] = {3, 1, -1, 5};
-
- auto operand_lhs = _graph->addOperand(shape, type);
- auto operand_rhs1 = _graph->addOperand(shape, type);
- auto operand_result1 = _graph->addOperand(shape, type);
- auto operand_rhs2 = _graph->addOperand(shape, type);
- auto operand_result2 = _graph->addOperand(shape, type);
- _graph->operands()
- .at(operand_rhs2)
- .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
-
- // 2nd add operations (result2 <= result1 + rhs2)
-
- operation::BinaryArithmetic::Param param1;
- param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
- param1.activation = Activation::NONE;
- auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
- auto output_set1 = OperandIndexSequence{operand_result1};
- _graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
-
- operation::BinaryArithmetic::Param param2;
- param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
- param2.activation = Activation::NONE;
- auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
- auto output_set2 = OperandIndexSequence{operand_result2};
- _graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
-
- // Identify model inputs and outputs
-
- _graph->getInputs().append(operand_lhs);
- _graph->getInputs().append(operand_rhs1);
- _graph->getOutputs().append(operand_result2);
-
- _graph->verify();
-
- auto model = std::make_shared<onert::ir::Model>();
- model->push(onert::ir::SubgraphIndex{0}, _graph);
-
- _executors = std::make_shared<Executors>();
- _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
- }
-
- void CreateUnspecifiedDimensionsModel()
- {
- // Model: one elementwise add operation
- // model input: lhs, rhs
- // model output: add result
- // lhs, rhs, result shape: {1, unknown, 2, 1}
- // activation: none (constant)
- _graph = std::make_unique<Graph>();
-
- // Add operands
-
- Shape shape{1, 0, 2, 1};
- TypeInfo type{DataType::INT32};
- Shape shape_scalar(0);
- TypeInfo type_scalar{DataType::INT32};
-
- auto operand_lhs = _graph->addOperand(shape, type);
- auto operand_rhs = _graph->addOperand(shape, type);
-
- auto operand_activation = _graph->addOperand(shape_scalar, type_scalar);
- _graph->operands()
- .at(operand_activation)
- .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4));
-
- auto operand_result = _graph->addOperand(shape, type);
-
- // Add operations
-
- operation::BinaryArithmetic::Param param;
- param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
- param.activation = Activation::NONE;
- auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
- auto output_set = OperandIndexSequence{operand_result};
- _graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
-
- // Identify model inputs and outputs
-
- _graph->getInputs().append(operand_lhs);
- _graph->getInputs().append(operand_rhs);
- _graph->getOutputs().append(operand_result);
-
- _graph->verify();
-
- auto model = std::make_shared<onert::ir::Model>();
- model->push(onert::ir::SubgraphIndex{0}, _graph);
-
- _executors = std::make_shared<Executors>();
- _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
- }
-
- void createExecution() { _execution = std::make_unique<Execution>(_executors); }
-
- virtual void TearDown() { _executors = nullptr; }
-
- std::shared_ptr<Graph> _graph{nullptr};
- std::shared_ptr<Executors> _executors{nullptr};
- std::unique_ptr<Execution> _execution{nullptr};
- const int32_t _activation_value{0};
-};
-
-TEST_F(InterpExecutorTest, create_empty)
-{
- Graph graph;
- graph.verify();
- auto executor = std::make_unique<InterpExecutor>(graph);
- ASSERT_NE(executor, nullptr);
-}
-
-TEST_F(InterpExecutorTest, create_simple)
-{
- CreateSimpleModel();
- ASSERT_NE(_executors, nullptr);
- ASSERT_NE(_executors->at(onert::ir::SubgraphIndex{0}), nullptr);
-}
-
-TEST_F(InterpExecutorTest, neg_setInput)
-{
- CreateSimpleModel();
- createExecution();
-
- auto input1 = IOIndex{0};
- const int32_t input1_buffer[4] = {1, 0, -1, -2};
-
- EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 4),
- std::runtime_error);
- EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 12),
- std::runtime_error);
- EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, neg_setOutput)
-{
- CreateSimpleModel();
- createExecution();
-
- auto output = IOIndex{0};
- auto output_idx = _graph->getOutputs().at(output);
-
- int32_t output_buffer[4] = {};
-
- EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 4),
- std::runtime_error);
- EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 12),
- std::runtime_error);
- EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, neg_setInputForUnspecifiedDimensions)
-{
- CreateUnspecifiedDimensionsModel();
- createExecution();
-
- auto input1 = IOIndex{0};
- const int32_t input1_buffer[4] = {1, 0, -1, -2};
-
- TypeInfo operand_type{DataType::INT32};
- Shape operand_shape{1, 2, 2, 1};
-
- EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
- reinterpret_cast<const void *>(input1_buffer), 4),
- std::runtime_error);
- EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
- reinterpret_cast<const void *>(input1_buffer), 12),
- std::runtime_error);
- EXPECT_NO_THROW(_execution->setInput(input1, operand_type, operand_shape,
- reinterpret_cast<const void *>(input1_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, neg_setOutputForUnspecifiedDimensions)
-{
- CreateUnspecifiedDimensionsModel();
- createExecution();
-
- auto output = IOIndex{0};
- auto output_idx = _graph->getOutputs().at(output);
-
- TypeInfo operand_type{DataType::INT32};
- Shape operand_shape{1, 2, 2, 1};
-
- int32_t output_buffer[4] = {};
-
- EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
- reinterpret_cast<void *>(output_buffer), 4),
- std::runtime_error);
- EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
- reinterpret_cast<void *>(output_buffer), 12),
- std::runtime_error);
- EXPECT_NO_THROW(_execution->setOutput(output, operand_type, operand_shape,
- reinterpret_cast<void *>(output_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, execute)
-{
- CreateSimpleModel();
- createExecution();
-
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto input1_idx = _graph->getInputs().at(input1);
- auto input2_idx = _graph->getInputs().at(input2);
-
- const int32_t input1_buffer[4] = {1, 0, -1, -2};
- const int32_t input2_buffer[4] = {1, -3, 2, -4};
-
- auto output = IOIndex{0};
- auto output_idx = _graph->getOutputs().at(output);
-
- int32_t output_buffer[4] = {};
-
- EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
- EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
- EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
- EXPECT_NO_THROW(_execution->execute());
- EXPECT_EQ(output_buffer[0], 2);
- EXPECT_EQ(output_buffer[1], -3);
- EXPECT_EQ(output_buffer[2], 1);
- EXPECT_EQ(output_buffer[3], -6);
-}
-
-TEST_F(InterpExecutorTest, executeTwoStep)
-{
- CreateTwoStepModel();
- createExecution();
-
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto input1_idx = _graph->getInputs().at(input1);
- auto input2_idx = _graph->getInputs().at(input2);
-
- const int32_t input1_buffer[4] = {1, 0, -1, -2};
- const int32_t input2_buffer[4] = {1, -3, 2, -4};
-
- auto output = IOIndex{0};
- auto output_idx = _graph->getOutputs().at(output);
-
- int32_t output_buffer[4] = {};
-
- EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
- EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
- EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
- EXPECT_NO_THROW(_execution->execute());
- EXPECT_EQ(output_buffer[0], 5);
- EXPECT_EQ(output_buffer[1], -2);
- EXPECT_EQ(output_buffer[2], 0);
- EXPECT_EQ(output_buffer[3], -1);
-}
-
-} // namespace
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef INTERP_OP
-#error Define INTERP_OP before including this file
-#endif
-
-// Supported operation name in interpreter
-//
-// Same list with Operations.lst
-// Make comment out if operation is not supported in interpreter
-INTERP_OP(BinaryArithmetic)
-//INTERP_OP(BatchToSpaceND)
-//INTERP_OP(Cast)
-INTERP_OP(Conv2D)
-INTERP_OP(DepthwiseConv2D)
-INTERP_OP(Pool2D)
-INTERP_OP(Concat)
-INTERP_OP(FullyConnected)
-//INTERP_OP(Reduce)
-INTERP_OP(Reshape)
-INTERP_OP(Softmax)
-//INTERP_OP(Squeeze)
-//INTERP_OP(Slice)
-//INTERP_OP(StridedSlice)
-INTERP_OP(ElementwiseActivation)
-//INTERP_OP(Transpose)
-//INTERP_OP(Exp)
-//INTERP_OP(Comparison)
-//INTERP_OP(LogicalNot)
-//INTERP_OP(LSTM)
-//INTERP_OP(RSQRT)
-//INTERP_OP(ResizeBilinear)
-//INTERP_OP(RNN)
-//INTERP_OP(Floor)
-//INTERP_OP(SpaceToBatchND)
-//INTERP_OP(SpaceToDepth)
-//INTERP_OP(EmbeddingLookup)
-//INTERP_OP(L2Normalization)
-//INTERP_OP(HashtableLookup)
-INTERP_OP(InstanceNorm)
-//INTERP_OP(PReLU)
-INTERP_OP(TransposeConv)
-//INTERP_OP(SQRT)
-//INTERP_OP(SquaredDifference)
-//INTERP_OP(TopKV2)
-INTERP_OP(Gather)
-//INTERP_OP(Neg)
-//INTERP_OP(Abs)
-//INTERP_OP(ArgMax)
-//INTERP_OP(Dequantize)
-//INTERP_OP(LocalResponseNormalization)
-//INTERP_OP(DepthToSpace)
-//INTERP_OP(Pack)
-//INTERP_OP(Split)
-//INTERP_OP(Unpack)
-INTERP_OP(Pad)
-//INTERP_OP(Custom)
-//INTERP_OP(Permute)
-//INTERP_OP(OneHot)
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Interpreter.h"
-
-#include <stack>
-#include <unordered_set>
-
-#include "Registration.h"
-
-#include "ir/OperandIndexMap.h"
-#include "util/logging.h"
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace interp
-{
-
-// TODO more structured execution kernel implementation
-// TODO use cker for execution
-// TODO divide tensor prepare and execution
-// TODO introduce memory manager (buffer allocate and free)
-class OperationExecutor
-{
-public:
- OperationExecutor(ExecEnv *env) : _env{env}
- {
-#define INTERP_OP(InternalName) _kernels[ir::OpCode::InternalName] = get##InternalName();
-#include "InterpOps.lst"
-#undef INTERP_OP
- }
-
- void execute(const ir::OperationIndex &idx)
- {
- const ir::Operation &node = _env->graph().operations().at(idx);
- const auto nodeName = node.name();
- VERBOSE(INTERPRETER) << "Prepare output operands and execute " << nodeName
- << " operation (id: " << idx << ")" << std::endl;
-
- const auto nodeOpCode = node.opcode();
- if (_kernels.find(nodeOpCode) == _kernels.end())
- {
- throw std::runtime_error{"Interpreter: Operation " + nodeName + " is not yet implemented"};
- }
-
- if (_kernels[nodeOpCode]->prepare != nullptr)
- {
- _kernels[nodeOpCode]->prepare(_env, node);
- }
- _kernels[nodeOpCode]->invoke(_env, node);
- }
-
-private:
- ExecEnv *_env;
- std::unordered_map<ir::OpCode, OpKernel *> _kernels;
-};
-
-void Interpreter::run()
-{
- VERBOSE(INTERPRETER) << "Interpreter is invoked " << std::endl;
-
- // operand_stack: save operands prepared to use
- std::stack<ir::OperandIndex> operand_stack;
-
- // Note: We should push input first, then constant.
- // We use use-def for find operators ready to execution,
- // but Use-Def cannot handle parameters (maybe constant, but not always)
- // Note: If all model inputs are constant, it may not work (depend on tensors' order).
- // But that scenario may not exist
- for (auto ind : _env->graph().getInputs())
- {
- VERBOSE(INTERPRETER) << "Input: Push to operand stack " << ind << std::endl;
-
- operand_stack.push(ind);
- }
-
- _env->graph().operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
- if (obj.isConstant())
- {
- VERBOSE(INTERPRETER) << "Constant: Push to operand stack " << ind << std::endl;
-
- operand_stack.push(ind);
- }
- });
-
- // Execution
- std::unordered_set<ir::OperandIndex> ready_check;
- std::unordered_set<ir::OperationIndex> executed;
- OperationExecutor executor{_env.get()};
- while (!operand_stack.empty())
- {
- const auto current_operand_index = operand_stack.top();
- operand_stack.pop();
- VERBOSE(INTERPRETER) << "Poped operand " << current_operand_index.value()
- << " is checked ready to use" << std::endl;
-
- assert(ready_check.find(current_operand_index) == ready_check.end());
- ready_check.insert(current_operand_index);
-
- // Find prepared operations by scan use of current operand
- std::stack<ir::OperationIndex> operation_stack;
- const auto use_operators = _env->graph().operands().at(current_operand_index).getUses();
- for (const auto &use_operator : use_operators)
- {
- // Assumption: all parameters are ready to use
- bool operator_ready = true;
- for (auto input_index : _env->graph().operations().at(use_operator).getInputs())
- {
- if (ready_check.find(input_index) == ready_check.end())
- {
- operator_ready = false;
- break;
- }
- }
-
- if (operator_ready)
- {
- VERBOSE(INTERPRETER) << "Ready to execute operation " << use_operator << std::endl;
- operation_stack.push(use_operator);
- }
- }
-
- while (!operation_stack.empty())
- {
- const auto current_operation_index = operation_stack.top();
- operation_stack.pop();
- VERBOSE(INTERPRETER) << "Poped operation: " << current_operation_index << "("
- << _env->graph().operations().at(current_operation_index).name() << ")"
- << std::endl;
-
- // execution
- // 1. Prepare output tensor
- // 2. Call operation kernel
- executor.execute(current_operation_index);
- executed.insert(current_operation_index);
-
- // 3. Push each output into operand stack
- const auto def_operands = _env->graph().operations().at(current_operation_index).getOutputs();
- for (auto def_operand : def_operands)
- {
- VERBOSE(INTERPRETER) << "Buffer: Push to operand stack " << def_operand.value()
- << std::endl;
- operand_stack.push(def_operand);
- }
-
- // 4. Free if lifetime of buffer operands used by input is finished
- for (auto input_index : _env->graph().operations().at(current_operation_index).getInputs())
- {
- const auto use_operators = _env->graph().operands().at(input_index).getUses();
- bool dead_buffer = true;
- for (const auto &use_operator : use_operators)
- {
- if (executed.find(use_operator) == executed.end())
- {
- dead_buffer = false;
- break;
- }
- }
-
- if (dead_buffer)
- {
- _env->freeIfAllocated(input_index);
- }
- }
- }
- }
-}
-
-} // namespace interp
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Interpreter.h
- * @brief This file contains Interpreter class for interpretation
- */
-#ifndef __ONERT_INTERP_INTERPRETER_H__
-#define __ONERT_INTERP_INTERPRETER_H__
-
-#include "ExecEnv.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Class for interpretation
- */
-class Interpreter
-{
-
-public:
- /**
- * @brief Construct a new Interpreter object (deleted)
- */
- Interpreter() = delete;
- /**
- * @brief Construct a new Interpreter object
- * @param[in] env Execution environment variable for interpreter object
- */
- Interpreter(std::unique_ptr<ExecEnv> env) : _env{std::move(env)}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Run interpreter until there is no operation to execute
- */
- void run();
-
-private:
- std::unique_ptr<ExecEnv> _env;
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_INTERPRETER_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_INTERP_REGISTRATION_H__
-#define __ONERT_INTERP_REGISTRATION_H__
-
-#include "ExecEnv.h"
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace interp
-{
-
-struct OpKernel
-{
- std::function<void(ExecEnv *, const ir::Operation &)> prepare;
- std::function<void(const ExecEnv *, const ir::Operation &)> invoke;
-};
-
-// Defined in operations/ directory
-#define INTERP_OP(InternalName) OpKernel *get##InternalName();
-#include "InterpOps.lst"
-#undef INTERP_OP
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_REGISTRATION_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Tensor.h"
-
-#define NO_USE(a) (void)(a)
-
-namespace onert
-{
-namespace interp
-{
-
-void ITensor::access(const std::function<void(backend::ITensor &tensor)> &fn) { fn(*this); }
-
-size_t ROTensor::calcOffset(const ir::Coordinates &coords) const
-{
- NO_USE(coords);
- throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
-}
-
-size_t Tensor::calcOffset(const ir::Coordinates &coords) const
-{
- NO_USE(coords);
- throw std::runtime_error("offset_element_in_bytes is not supported for cpu::Tensor now.");
-}
-
-ir::Layout ROTensor::layout() const
-{
- // TODO Changes to return frontend layout
- return ir::Layout::NHWC;
-}
-
-ir::Layout Tensor::layout() const
-{
- // TODO Changes to return frontend layout
- return ir::Layout::NHWC;
-}
-
-ir::Shape Tensor::getShape() const { return _info.shape(); }
-
-ir::Shape ROTensor::getShape() const { return _info.shape(); }
-
-} // namespace interp
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Tensor.h
- * @brief This file contains ITensor interface, ROTensor class, and Tensor class
- */
-#ifndef __ONERT_INTERP_TENSOR_H__
-#define __ONERT_INTERP_TENSOR_H__
-
-#include "Buffer.h"
-
-#include "ir/OperandInfo.h"
-#include "backend/ITensor.h"
-#include "ir/Layout.h"
-
-namespace onert
-{
-namespace interp
-{
-
-/**
- * @brief Interface to handle Tensor in interpreter
- */
-class ITensor : public backend::ITensor
-{
-public:
- virtual ~ITensor() = default;
-
-public:
- virtual uint8_t *buffer() const = 0;
- /**
- * @brief Return shared pointer for buffer
- * @return Buffer shared pointer
- */
- virtual std::shared_ptr<const Buffer> shareBuffer() const = 0;
- /**
- * @brief Return read-only buffer pointer
- * @return Read-only buffer pointer
- */
- virtual const uint8_t *bufferRO() const = 0;
- /**
- * @brief Return shared pointer for data
- * @return Data shared pointer
- */
- virtual std::shared_ptr<const ir::Data> shareData() const = 0;
- /**
- * @brief Set internal/external buffer
- * @param[in] buffer Buffer pointer
- */
- virtual void setBuffer(std::shared_ptr<const Buffer> buffer) = 0;
- /**
- * @brief Set data reference (including constant, input)
- * @param[in] data Data pointer
- */
- virtual void setData(std::shared_ptr<const ir::Data> data) = 0;
- virtual void releaseData() = 0;
-
- virtual size_t total_size() const = 0;
- virtual size_t calcOffset(const ir::Coordinates &coords) const = 0;
-
- virtual bool has_padding() const = 0;
- /**
- * @brief Return data type of tensor
- * @return Data type of tensor
- */
- virtual ir::DataType data_type() const = 0;
- /**
- * @brief Return TensorInfo
- * @return TensorInfo
- */
- virtual const ir::OperandInfo &tensorInfo() const = 0;
- /**
- * @brief Return number of elements
- * @return Number of elements
- */
- virtual uint64_t num_elements() const = 0;
- void access(const std::function<void(backend::ITensor &tensor)> &fn) final;
-};
-
-/**
- * @brief Class to handle tensor in interpreter as read-only
- */
-class ROTensor final : public ITensor
-{
-public:
- ROTensor() = delete;
- ROTensor(const ir::OperandInfo &info) : _info(info)
- {
- // DO NOTHING
- }
-
-public:
- uint8_t *buffer() const override { throw std::runtime_error{"Read only tensor"}; }
- std::shared_ptr<const Buffer> shareBuffer() const override
- {
- throw std::runtime_error{"Read only tensor"};
- }
- const uint8_t *bufferRO() const override { return _data->base(); }
- std::shared_ptr<const ir::Data> shareData() const override { return _data; }
- void setBuffer(std::shared_ptr<const Buffer> buffer) override { _data = buffer; }
- void setData(std::shared_ptr<const ir::Data> data) override { _data = data; }
- void releaseData() override { _data = nullptr; }
-
- size_t total_size() const override { return _info.total_size(); }
- size_t calcOffset(const ir::Coordinates &coords) const override;
- ir::Layout layout() const override;
- bool is_dynamic() const override { return false; }
- bool has_padding() const override { return false; }
- ir::DataType data_type() const override { return _info.typeInfo().type(); }
- float data_scale() const override { return _info.typeInfo().scale(); }
- int32_t data_zero_point() const override { return _info.typeInfo().zero_point(); }
- const std::vector<float> &data_scales() const override { return _info.typeInfo().scales(); }
- const std::vector<int32_t> &data_zero_points() const override
- {
- return _info.typeInfo().zero_points();
- }
- const ir::OperandInfo &tensorInfo() const override { return _info; }
- uint64_t num_elements() const override { return _info.shape().num_elements(); };
- ir::Shape getShape() const override;
-
-private:
- const ir::OperandInfo _info;
- std::shared_ptr<const ir::Data> _data{nullptr};
-};
-
-/**
- * @brief Class to handle tensor in interpreter as writable
- */
-class Tensor final : public ITensor
-{
-public:
- Tensor() = delete;
- Tensor(const ir::OperandInfo &info) : _info(info)
- {
- // DO NOTHING
- }
-
-public:
- uint8_t *buffer() const override { return _buffer->baseWritable(); }
- std::shared_ptr<const Buffer> shareBuffer() const override { return _buffer; };
- const uint8_t *bufferRO() const override { return _buffer->base(); }
- std::shared_ptr<const ir::Data> shareData() const override { return _buffer; }
- void setBuffer(std::shared_ptr<const Buffer> buffer) override { _buffer = buffer; }
- void setData(std::shared_ptr<const ir::Data>) override
- {
- throw std::runtime_error{"Passed data may read-only"};
- }
- void releaseData() override { _buffer = nullptr; }
-
- size_t total_size() const override { return _info.total_size(); }
- size_t calcOffset(const ir::Coordinates &coords) const override;
- ir::Layout layout() const override;
- bool is_dynamic() const override { return false; }
- bool has_padding() const override { return false; }
- ir::DataType data_type() const override { return _info.typeInfo().type(); }
- float data_scale() const override { return _info.typeInfo().scale(); }
- int32_t data_zero_point() const override { return _info.typeInfo().zero_point(); }
- const std::vector<float> &data_scales() const override { return _info.typeInfo().scales(); }
- const std::vector<int32_t> &data_zero_points() const override
- {
- return _info.typeInfo().zero_points();
- }
- const ir::OperandInfo &tensorInfo() const override { return _info; }
- uint64_t num_elements() const override { return _info.shape().num_elements(); };
- ir::Shape getShape() const override;
-
-private:
- const ir::OperandInfo _info;
- std::shared_ptr<const Buffer> _buffer{nullptr};
-};
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_TENSOR_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/BinaryArithmetic.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-#include <cker/Types.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-enum class OpType
-{
- ADD,
- SUB,
- MUL
-};
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
- const auto &arithmetic_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
-
- const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
- const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
- const auto out_index = node.getOutputs().at(0);
-
- const auto lhs_tensor = env->tensorAt(lhs_index);
- const auto rhs_tensor = env->tensorAt(rhs_index);
-
- // Check shape and type lhs is same with rhs
- // TODO Util function to compare TensorInfo
- if (lhs_tensor->data_type() != rhs_tensor->data_type())
- {
- throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Different input types"};
- }
-
- bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape());
- if (try_broadcast)
- {
- bool success = true;
- auto out_shape = calcBroadcastShape(lhs_tensor->tensorInfo().shape(),
- rhs_tensor->tensorInfo().shape(), success);
- if (!success)
- {
- throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Fail to brodcasting"};
- }
-
- auto output_info =
- ir::OperandInfo::createStaticInfo(out_shape, lhs_tensor->tensorInfo().typeInfo());
- // We can handle already allocated (ex. model output)
- env->allocateIfNeeded(out_index, output_info);
- }
- else
- {
- // Output's shape and type is same with input
- auto output_info = lhs_tensor->tensorInfo();
- // We can handle already allocated (ex. model output)
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- // Check shape and type lhs is same with output
- // TODO Util function to compare TensorInfo
- if (lhs_tensor->data_type() != out_tensor->data_type())
- {
- throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Invalid output type"};
- }
-}
-
-inline void setActivationParams(float min, float max, nnfw::cker::BinaryArithmeticOpParam *params)
-{
- params->float_activation_min = min;
- params->float_activation_max = max;
-}
-
-inline void setActivationParams(int32_t min, int32_t max,
- nnfw::cker::BinaryArithmeticOpParam *params)
-{
- params->quantized_activation_min = min;
- params->quantized_activation_max = max;
-}
-
-template <typename raw_type, OpType op_type>
-void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor,
- const ir::operation::BinaryArithmetic::Param ¶m)
-{
- const auto lhs_buffer = lhs_tensor->bufferRO();
- const auto rhs_buffer = rhs_tensor->bufferRO();
- auto out_buffer = out_tensor->buffer();
-
- nnfw::cker::BinaryArithmeticOpParam cker_param;
- raw_type activation_min, activation_max;
- calculateActivationRange(param.activation, &activation_min, &activation_max);
- setActivationParams(activation_min, activation_max, &cker_param);
- const raw_type *lhs_ptr = reinterpret_cast<const raw_type *>(lhs_buffer);
- const raw_type *rhs_ptr = reinterpret_cast<const raw_type *>(rhs_buffer);
- raw_type *out_ptr = reinterpret_cast<raw_type *>(out_buffer);
-
- const auto cker_op_type =
- (op_type == OpType::ADD) ? nnfw::cker::BinaryArithmeticOpType::ADD
- : ((op_type == OpType::SUB) ? nnfw::cker::BinaryArithmeticOpType::SUB
- : nnfw::cker::BinaryArithmeticOpType::MUL);
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(convertShape(lhs_tensor->tensorInfo().shape()),
- convertShape(rhs_tensor->tensorInfo().shape()), &cker_param);
-
- if (need_broadcast)
- {
- const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape());
- const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- nnfw::cker::BroadcastBinaryArithmeticOp<cker_op_type>(cker_param, lhs_shape, lhs_ptr, rhs_shape,
- rhs_ptr, out_shape, out_ptr);
- return;
- }
-
- const auto lhs_shape = convertShape(lhs_tensor->tensorInfo().shape());
- const auto rhs_shape = convertShape(rhs_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- nnfw::cker::BinaryArithmeticOp<cker_op_type>(cker_param, lhs_shape, lhs_ptr, rhs_shape, rhs_ptr,
- out_shape, out_ptr);
-}
-
-template <OpType op_type>
-void invokeBinaryArithmetic(const ExecEnv *env, const ir::operation::BinaryArithmetic &node)
-{
- const auto lhs_index = node.getInputs().at(node.LHS);
- const auto rhs_index = node.getInputs().at(node.RHS);
- const auto out_index = node.getOutputs().at(0);
- const auto lhs_tensor = env->tensorAt(lhs_index);
- const auto rhs_tensor = env->tensorAt(rhs_index);
- const auto out_tensor = env->tensorAt(out_index);
- const auto data_type = lhs_tensor->data_type();
-
- if (data_type == ir::DataType::INT32)
- {
- invoke<int32_t, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
- }
- else if (data_type == ir::DataType::FLOAT32)
- {
- invoke<float, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Unsupported data type"};
- }
-}
-
-void invokeBinaryArithmeticOps(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &arithmetic_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
-
- switch (arithmetic_node.param().arithmetic_type)
- {
- case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
- invokeBinaryArithmetic<OpType::ADD>(env, arithmetic_node);
- break;
- case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
- invokeBinaryArithmetic<OpType::SUB>(env, arithmetic_node);
- break;
- case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
- invokeBinaryArithmetic<OpType::MUL>(env, arithmetic_node);
- break;
- default:
- throw std::runtime_error{"Interp(BinaryArithmetic): NYI unsupported operation " +
- arithmetic_node.name()};
- break;
- }
-}
-
-} // namespace
-
-OpKernel *getBinaryArithmetic()
-{
- static OpKernel kernel = {prepare, invokeBinaryArithmeticOps};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Concat.h"
-
-#include <cker/operation/Concatenation.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace concat
-{
-
-void prepareConcat(ExecEnv *env, const ir::Operation &node)
-{
- const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node);
-
- const auto first_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto first_tensor = env->tensorAt(first_index);
- uint32_t out_axis_dimension = 0;
- const int32_t axis_raw = concat_node.param().axis;
- const int32_t axis = (axis_raw < 0) ? (axis_raw + first_tensor->getShape().rank()) : axis_raw;
-
- // All inputs shape should be same except axis dimension
- // All inputs type should be same
- for (auto input : node.getInputs())
- {
- assert(first_tensor->getShape().rank() == env->tensorAt(input)->getShape().rank());
- assert(first_tensor->data_type() == env->tensorAt(input)->data_type());
- for (int i = 0; i < first_tensor->getShape().rank(); i++)
- {
- if (i == axis)
- {
- out_axis_dimension += env->tensorAt(input)->getShape().dim(i);
- continue;
- }
- assert(first_tensor->getShape().dim(i) == env->tensorAt(input)->getShape().dim(i));
- }
- }
-
- // Make output tensor info using first input tensor info, and accumulated axis dimension value
- auto out_shape = first_tensor->tensorInfo().shape();
- out_shape.dim(axis) = out_axis_dimension;
- env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(out_shape, first_tensor->tensorInfo().typeInfo()));
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Output shape should be same with input except axis getShape().dim
- // Output type should be same with input
- assert(first_tensor->data_type() == out_tensor->data_type());
- for (int i = 0; i < first_tensor->getShape().rank(); i++)
- {
- if (i == axis)
- {
- continue;
- }
- assert(first_tensor->getShape().dim(i) == out_tensor->getShape().dim(i));
- }
-}
-
-void invoke(const std::vector<const ITensor *> in_tensors, const ITensor *out_tensor, uint32_t axis)
-{
- const uint32_t count = in_tensors.size();
-
- // Calculate
- nnfw::cker::ConcatenationParams cker_param;
- cker_param.axis = (int8_t)axis;
- cker_param.inputs_count = count;
-
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-
- std::vector<nnfw::cker::Shape> in_shapes;
- std::vector<const nnfw::cker::Shape *> in_shape_ptrs;
- in_shapes.reserve(count);
- in_shape_ptrs.reserve(count);
- std::vector<const float *> in_ptrs;
- for (uint32_t i = 0; i < count; i++)
- {
- in_shapes.push_back(convertShape(in_tensors[i]->tensorInfo().shape()));
- in_shape_ptrs.push_back(&in_shapes[i]);
- in_ptrs.push_back(reinterpret_cast<const float *>(in_tensors[i]->bufferRO()));
- }
-
- auto out_buffer = out_tensor->buffer();
- float *out_ptr = reinterpret_cast<float *>(out_buffer);
-
- nnfw::cker::Concatenation<float>(cker_param, in_shape_ptrs.data(), in_ptrs.data(), out_shape,
- out_ptr);
-}
-
-void invokeConcat(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &concat_node = nnfw::misc::polymorphic_downcast<const ir::operation::Concat &>(node);
- const int32_t axis_raw = concat_node.param().axis;
-
- std::vector<const ITensor *> in_tensors;
- for (const auto &e : concat_node.getInputs())
- {
- in_tensors.emplace_back(env->tensorAt(e));
- }
-
- const auto out_index = node.getOutputs().at(0);
- const auto out_tensor = env->tensorAt(out_index);
- const uint32_t axis = (axis_raw < 0) ? (axis_raw + out_tensor->getShape().rank()) : axis_raw;
-
- const auto data_type = in_tensors[0]->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(in_tensors, out_tensor, axis);
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-} // namespace concat
-
-OpKernel *getConcat()
-{
- static OpKernel kernel = {concat::prepareConcat, concat::invokeConcat};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Conv2D.h"
-#include "util/ShapeInference.h"
-#include "util/Utils.h"
-
-#include <cker/operation/Conv.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace conv2d
-{
-
-void prepareConv2D(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(ir::operation::Conv2D::INPUT);
- const auto kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto kernel_tensor = env->tensorAt(kernel_index);
- const auto bias_tensor = env->tensorAt(bias_index);
-
- assert(in_tensor->getShape().rank() == 4);
- assert(kernel_tensor->getShape().rank() == 4);
- assert(bias_tensor->getShape().rank() == 1);
-
- UNUSED_RELEASE(in_tensor);
- UNUSED_RELEASE(kernel_tensor);
- UNUSED_RELEASE(bias_tensor);
-
- const auto output_info = env->graph().operands().at(out_index).info();
- if (output_info.total_size() == 0)
- {
- // Handle unspecified output shape
- const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
- const auto infered_output_shape = shape_inference::inferConv2DShape(
- in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(), conv_node.param());
- env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
- }
- else
- {
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->getShape().rank() == 4);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
- const ITensor *ofm_tensor, const ir::operation::Conv2D::Param ¶m)
-{
- // TODO Support NCHW frontned
- const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
- const auto &ker_shape = ker_tensor->tensorInfo().shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
- const auto padding =
- ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, ker_width, ker_height);
-
- // Calculate
- float activation_min, activation_max;
- calculateActivationRange(param.activation, &activation_min, &activation_max);
-
- nnfw::cker::ConvParams cker_param;
- cker_param.padding_type = convertPaddingType(param.padding.type);
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
- cker_param.dilation_width_factor = 1;
- cker_param.dilation_height_factor = 1;
- cker_param.float_activation_min = activation_min;
- cker_param.float_activation_max = activation_max;
-
- const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
- const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
- const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
- const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
- const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
- const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
- const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO());
- float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
- nnfw::cker::Conv conv_kernel;
- conv_kernel(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr, cker_bias_shape,
- bias_ptr, cker_ofm_shape, ofm_ptr);
-}
-
-void invokeConv2D(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &conv_node = nnfw::misc::polymorphic_downcast<const ir::operation::Conv2D &>(node);
-
- const auto ifm_index = node.getInputs().at(ir::operation::Conv2D::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
- const auto bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto bias_tensor = env->tensorAt(bias_index);
- const auto ofm_tensor = env->tensorAt(ofm_index);
-
- const auto data_type = ifm_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-} // namespace conv2d
-
-OpKernel *getConv2D()
-{
- static OpKernel kernel = {conv2d::prepareConv2D, conv2d::invokeConv2D};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/DepthwiseConv2D.h"
-#include "util/ShapeInference.h"
-#include "util/Utils.h"
-
-#include <cker/operation/DepthwiseConv.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-
-namespace
-{
-
-void prepareDepthwiseConv(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT);
- const auto kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto kernel_tensor = env->tensorAt(kernel_index);
- const auto bias_tensor = env->tensorAt(bias_index);
-
- assert(in_tensor->getShape().rank() == 4);
- assert(kernel_tensor->getShape().rank() == 4);
- assert(bias_tensor->getShape().rank() == 1);
-
- UNUSED_RELEASE(in_tensor);
- UNUSED_RELEASE(kernel_tensor);
- UNUSED_RELEASE(bias_tensor);
-
- // TODO handle unspecified output shape:
- // calculate output shape using ifm shape, kernel shape, padding, stride
- const auto output_info = env->graph().operands().at(out_index).info();
- if (output_info.total_size() == 0)
- {
- // Handle unspecified output shape
- const auto &depth_conv_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::DepthwiseConv2D &>(node);
- const auto infered_output_shape = shape_inference::inferDepthwiseConv2DShape(
- in_tensor->tensorInfo().shape(), kernel_tensor->tensorInfo().shape(),
- depth_conv_node.param());
- env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
- }
- else
- {
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->getShape().rank() == 4);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
- const ITensor *ofm_tensor, const ir::operation::DepthwiseConv2D::Param ¶m)
-{
- // TODO Support NCHW frontend
- const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- // Kernel format is [1, kernel_height, kernel_width, depth_out].
- const auto &ker_shape = ker_tensor->tensorInfo().shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
- const auto padding =
- ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, ker_width, ker_height);
-
- // Calculate
- float activation_min, activation_max;
- calculateActivationRange(param.activation, &activation_min, &activation_max);
-
- nnfw::cker::DepthwiseConvParams cker_param;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.depth_multiplier = param.multiplier;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
- cker_param.dilation_width_factor = 1;
- cker_param.dilation_height_factor = 1;
- cker_param.float_activation_min = activation_min;
- cker_param.float_activation_max = activation_max;
-
- const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
- const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
- const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
- const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
- const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
- const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
- const float *bias_ptr = reinterpret_cast<const float *>(bias_tensor->bufferRO());
- float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
- nnfw::cker::DepthwiseConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
- cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr, nullptr);
-}
-
-void invokeDepthwiseConv(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &conv_node = static_cast<const ir::operation::DepthwiseConv2D &>(node);
-
- const auto ifm_index = node.getInputs().at(ir::operation::DepthwiseConv2D::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
- const auto bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto bias_tensor = env->tensorAt(bias_index);
- const auto ofm_tensor = env->tensorAt(ofm_index);
-
- const auto data_type = ifm_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-
-} // namespace
-
-OpKernel *getDepthwiseConv2D()
-{
- static OpKernel kernel = {prepareDepthwiseConv, invokeDepthwiseConv};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/ElementwiseActivation.h"
-
-#include <cker/operation/Logistic.h>
-#include <cker/operation/Tanh.h>
-#include <misc/polymorphic_downcast.h>
-
-#include <cmath>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-enum class ActivationType
-{
- Logistic,
- ReLU,
- Tanh
-};
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(0);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
-
- const auto output_info = env->graph().operands().at(output_index).info();
- if (output_info.total_size() == 0)
- {
- // Output's shape and type is same with input
- auto input_info = input_tensor->tensorInfo();
- // We can handle already allocated (ex. model output)
- env->allocateIfNeeded(output_index, input_info);
- }
- else
- {
- env->allocateIfNeeded(output_index, output_info);
- }
-
- const auto output_tensor = env->tensorAt(output_index);
- // Check shape and type lhs is same with output
- // TODO Util function to compare TensorInfo
- if (input_tensor->data_type() != output_tensor->data_type())
- {
- throw std::runtime_error{"Interp(ElementwiseActivation): Invalid output type"};
- }
-}
-
-template <ActivationType act_type>
-void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements, float alpha,
- float beta)
-{
- std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); };
- switch (act_type)
- {
- case ActivationType::ReLU:
- fn = [alpha, beta](const float &in) { return std::min(std::max(beta, in), alpha); };
- break;
- case ActivationType::Tanh:
- fn = [](const float &in) { return std::tanh(in); };
- break;
- default:
- throw std::runtime_error{"Interp(ElementwiseActivation): NYI - Unsupported activation"};
- break;
- }
-
- const float *input_end = input_ptr + num_elements;
- for (; input_ptr < input_end; input_ptr++, output_ptr++)
- {
- *output_ptr = fn(*input_ptr);
- }
-}
-
-template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(0);
- const auto output_index = node.getOutputs().at(0);
-
- // Check lhs shape is same with rhs (with broadcast)
- const auto input_tensor = env->tensorAt(input_index);
- const auto output_tensor = env->tensorAt(output_index);
-
- const auto data_type = input_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- uint64_t elements = input_tensor->num_elements();
- const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO());
- float *out = reinterpret_cast<float *>(output_tensor->buffer());
- if (act_type == ActivationType::Logistic)
- {
- const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- nnfw::cker::Logistic(cker_input_shape, input_start, cker_output_shape, out);
- }
- else
- {
- const auto &act_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
- evalFloat<act_type>(input_start, out, elements, act_node.param().alpha,
- act_node.param().beta);
- }
- }
- else
- {
- throw std::runtime_error{"Interp(" + node.name() + "): NYI - Support float only"};
- }
-}
-
-void invokeElementwiseActivation(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &act_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
- switch (act_node.param().op_type)
- {
- case ir::operation::ElementwiseActivation::Type::LOGISTIC:
- invoke<ActivationType::Logistic>(env, node);
- break;
- case ir::operation::ElementwiseActivation::Type::RELU:
- invoke<ActivationType::ReLU>(env, node);
- break;
- case ir::operation::ElementwiseActivation::Type::TANH:
- invoke<ActivationType::Tanh>(env, node);
- break;
- default:
- throw std::runtime_error("Interp(" + node.name() + "): NYI - Unsupported activation");
- }
-}
-
-} // namespace
-
-OpKernel *getElementwiseActivation()
-{
- static OpKernel kernel = {prepare, invokeElementwiseActivation};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/FullyConnected.h"
-
-#include <cker/operation/FullyConnected.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace fc
-{
-
-void prepareFC(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
- const auto kernel_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
- const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto kernel_tensor = env->tensorAt(kernel_index);
- const auto bias_tensor = env->tensorAt(bias_index);
-
- UNUSED_RELEASE(in_tensor);
- UNUSED_RELEASE(kernel_tensor);
- UNUSED_RELEASE(bias_tensor);
-
- assert(in_tensor->getShape().rank() >= 2);
- assert(kernel_tensor->getShape().rank() == 2);
- assert(bias_tensor->getShape().rank() == 1);
-
- const auto input_size_with_batch = in_tensor->num_elements();
- const auto num_units = kernel_tensor->getShape().dim(0);
- const auto input_size = kernel_tensor->getShape().dim(1);
- const int32_t batch_size = input_size_with_batch / input_size;
- assert(input_size_with_batch % input_size == 0);
- assert(num_units == bias_tensor->getShape().dim(0));
-
- // Make output tensor info
- ir::Shape output_shape(2);
- output_shape.dim(0) = batch_size;
- output_shape.dim(1) = num_units;
- const auto out_info =
- ir::OperandInfo::createStaticInfo(output_shape, in_tensor->tensorInfo().typeInfo());
- env->allocateIfNeeded(out_index, out_info);
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->getShape().rank() == 2);
- assert(out_tensor->getShape().dim(0) == batch_size);
- assert(out_tensor->getShape().dim(1) == num_units);
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *bias_tensor,
- const ITensor *ofm_tensor, const ir::operation::FullyConnected::Param ¶m)
-{
- const auto ifm_buffer = ifm_tensor->bufferRO();
- const auto ker_buffer = ker_tensor->bufferRO();
- const auto bias_buffer = bias_tensor->bufferRO();
- auto ofm_buffer = ofm_tensor->buffer();
-
- // Calculate
- nnfw::cker::FullyConnectedParams cker_param;
- cker_param.activation = convertActivationType(param.activation);
- const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
- const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
- const auto cker_bias_shape = convertShape(bias_tensor->tensorInfo().shape());
- const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
- const float *ifm_ptr = reinterpret_cast<const float *>(ifm_buffer);
- const float *ker_ptr = reinterpret_cast<const float *>(ker_buffer);
- const float *bias_ptr = reinterpret_cast<const float *>(bias_buffer);
- float *ofm_ptr = reinterpret_cast<float *>(ofm_buffer);
-
- nnfw::cker::FullyConnected(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
- cker_bias_shape, bias_ptr, cker_ofm_shape, ofm_ptr);
-}
-
-void invokeFC(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &conv_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::FullyConnected &>(node);
-
- const auto ifm_index = node.getInputs().at(ir::operation::FullyConnected::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
- const auto bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto bias_tensor = env->tensorAt(bias_index);
- const auto ofm_tensor = env->tensorAt(ofm_index);
-
- const auto data_type = ifm_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(ifm_tensor, ker_tensor, bias_tensor, ofm_tensor, conv_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float only"};
- }
-}
-} // namespace fc
-
-OpKernel *getFullyConnected()
-{
- static OpKernel kernel = {fc::prepareFC, fc::invokeFC};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Gather.h"
-
-#include <cker/operation/Gather.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareGather(ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT);
- const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
- const auto indices_tensor = env->tensorAt(indices_index);
-
- // TODO handle unspecified output shape:
- // calculate output shape using ifm shape, kernel shape, padding, stride
- const auto output_info = env->graph().operands().at(output_index).info();
- if (output_info.total_size() == 0)
- {
- throw std::runtime_error{"Interp(Gather): NYI for unspecified output shape"};
- }
- else
- {
- env->allocateIfNeeded(output_index, output_info);
- }
-
- if (indices_tensor->data_type() != ir::DataType::INT32)
- {
- throw std::runtime_error{"Interp(Gather): Invalid indices data type"};
- }
-
- auto output_tensor = env->tensorAt(output_index);
- auto output_rank = input_tensor->getShape().rank() + indices_tensor->getShape().rank() - 1;
-
- if (output_rank != output_tensor->getShape().rank())
- {
- throw std::runtime_error{"Interp(Gather): Invalid output rank"};
- }
- if (output_tensor->data_type() != input_tensor->data_type())
- {
- throw std::runtime_error{"Interp(Gather): Invalid output data type"};
- }
-
- if (input_tensor->data_type() == ir::DataType::QUANT_UINT8_ASYMM &&
- input_tensor->tensorInfo().typeInfo() != output_tensor->tensorInfo().typeInfo())
- {
- throw std::runtime_error{
- "Interp(Gather): Cannot handle different I/O QUANT_UINT8_ASYMM scale/offset"};
- }
-}
-
-template <typename raw_type>
-void invoke(const ITensor *input_tensors, const ITensor *indices_tensors,
- const ITensor *output_tensor, uint32_t axis)
-{
- // Calculate
- nnfw::cker::GatherParams cker_param;
- cker_param.axis = (int8_t)axis;
-
- const auto cker_input_shapes = convertShape(input_tensors->tensorInfo().shape());
- const auto cker_indices_shape = convertShape(indices_tensors->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- const raw_type *input_ptr = reinterpret_cast<const raw_type *>(input_tensors->bufferRO());
- const int32_t *indices_ptr = reinterpret_cast<const int32_t *>(indices_tensors->bufferRO());
- raw_type *output_ptr = reinterpret_cast<raw_type *>(output_tensor->buffer());
-
- nnfw::cker::Gather<raw_type>(cker_param, cker_input_shapes, input_ptr, cker_indices_shape,
- indices_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeGather(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &gather_node = nnfw::misc::polymorphic_downcast<const ir::operation::Gather &>(node);
- const int32_t axis_raw = gather_node.param().axis;
-
- const auto input_index = node.getInputs().at(ir::operation::Gather::INPUT);
- const auto indices_index = node.getInputs().at(ir::operation::Gather::INDICES);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
- const auto indices_tensor = env->tensorAt(indices_index);
- const auto output_tensor = env->tensorAt(output_index);
- const uint32_t axis = (axis_raw < 0) ? (axis_raw + input_tensor->getShape().rank()) : axis_raw;
-
- const auto data_type = input_tensor->data_type();
-
- switch (data_type)
- {
- case ir::DataType::FLOAT32:
- invoke<float>(input_tensor, indices_tensor, output_tensor, axis);
- break;
- case ir::DataType::INT32:
- invoke<int32_t>(input_tensor, indices_tensor, output_tensor, axis);
- break;
- case ir::DataType::QUANT_UINT8_ASYMM:
- invoke<uint8_t>(input_tensor, indices_tensor, output_tensor, axis);
- break;
- default:
- throw std::runtime_error{"Interp(Gather): NYI - Not supported type"};
- }
-}
-
-} // namespace
-
-OpKernel *getGather()
-{
- static OpKernel kernel = {prepareGather, invokeGather};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/InstanceNorm.h"
-
-#include <cker/operation/InstanceNorm.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace instancenorm
-{
-
-void prepareInstanceNorm(ExecEnv *env, const ir::Operation &node)
-{
- const auto &instancenorm_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
-
- const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
- const auto output_index = node.getOutputs().at(0);
- const auto input_tensor = env->tensorAt(input_index);
-
- if (input_tensor->getShape().rank() != 4)
- {
- throw std::runtime_error{"Interp(InstanceNorm): Input should be 4D-tensor"};
- }
-
- // Output shape should be same with input
- env->allocateIfNeeded(output_index, input_tensor->tensorInfo());
-
- auto output_tensor = env->tensorAt(output_index);
- UNUSED_RELEASE(output_tensor);
-
- // Handle same ifm & ofm data type only
- assert(input_tensor->data_type() == output_tensor->data_type());
- assert(input_tensor->tensorInfo().shape() == output_tensor->tensorInfo().shape());
-}
-
-inline void setActivationParams(float min, float max, nnfw::cker::InstanceNormParams *params)
-{
- params->float_activation_min = min;
- params->float_activation_max = max;
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *gamma_tensor, const ITensor *beta_tensor,
- const ITensor *output_tensor, const ir::operation::InstanceNorm::Param ¶m)
-{
- // Calculate
- float activation_min, activation_max;
- calculateActivationRange(param.activation, &activation_min, &activation_max);
-
- nnfw::cker::InstanceNormParams cker_param;
- cker_param.epsilon = param.epsilon;
- cker_param.float_activation_min = activation_min;
- cker_param.float_activation_max = activation_max;
-
- const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
- const auto cker_gamma_shape = convertShape(gamma_tensor->tensorInfo().shape());
- const auto cker_beta_shape = convertShape(beta_tensor->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- const float *input_ptr = reinterpret_cast<const float *>(input_tensor->bufferRO());
- const float *gamma_ptr = reinterpret_cast<const float *>(gamma_tensor->bufferRO());
- const float *beta_ptr = reinterpret_cast<const float *>(beta_tensor->bufferRO());
- float *output_ptr = reinterpret_cast<float *>(output_tensor->buffer());
-
- nnfw::cker::InstanceNorm(cker_param, cker_input_shape, input_ptr, cker_gamma_shape, gamma_ptr,
- cker_beta_shape, beta_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeInstanceNorm(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &instancenorm_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::InstanceNorm &>(node);
-
- const auto input_index = node.getInputs().at(instancenorm_node.INPUT);
- const auto gamma_index = node.getInputs().at(instancenorm_node.GAMMA);
- const auto beta_index = node.getInputs().at(instancenorm_node.BETA);
- const auto out_index = node.getOutputs().at(0);
- const auto input_tensor = env->tensorAt(input_index);
- const auto gamma_tensor = env->tensorAt(gamma_index);
- const auto beta_tensor = env->tensorAt(beta_index);
- const auto out_tensor = env->tensorAt(out_index);
- const auto data_type = input_tensor->data_type();
-
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(input_tensor, gamma_tensor, beta_tensor, out_tensor, instancenorm_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Unsupported data type"};
- }
-}
-} // namespace instancenorm
-
-OpKernel *getInstanceNorm()
-{
- static OpKernel kernel = {instancenorm::prepareInstanceNorm, instancenorm::invokeInstanceNorm};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
-#define __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
-
-#include "ir/Shape.h"
-#include "ir/InternalType.h"
-#include "ir/Padding.h"
-
-#include <cker/Shape.h>
-#include <cker/Types.h>
-
-namespace onert
-{
-namespace interp
-{
-
-inline nnfw::cker::Shape convertShape(const ir::Shape &shape)
-{
- auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end());
-
- std::vector<int32_t> raw_shape;
- raw_shape.resize(dimensions.size());
-
- for (uint32_t i = 0; i < dimensions.size(); ++i)
- {
- raw_shape[i] = dimensions[i];
- }
-
- return nnfw::cker::GetShape(raw_shape);
-}
-
-inline nnfw::cker::Shape convertExtendShape(const ir::Shape &shape)
-{
- auto dimensions = std::vector<uint32_t>(shape.dims().begin(), shape.dims().end());
-
- const int32_t extended_rank = 4;
- int32_t raw_shape[extended_rank];
- uint32_t start = extended_rank - dimensions.size();
-
- for (uint32_t i = 0; i < extended_rank; ++i)
- {
- if (i < start)
- {
- raw_shape[i] = 1;
- }
- else
- {
- raw_shape[i] = dimensions[i - start];
- }
- }
-
- return nnfw::cker::Shape(extended_rank, raw_shape);
-}
-
-inline nnfw::cker::FusedActivationFunctionType
-convertActivationType(const ir::Activation activation)
-{
- switch (activation)
- {
- case ir::Activation::NONE:
- return nnfw::cker::FusedActivationFunctionType::kNone;
- case ir::Activation::RELU:
- return nnfw::cker::FusedActivationFunctionType::kRelu;
- case ir::Activation::RELU1:
- return nnfw::cker::FusedActivationFunctionType::kRelu1;
- case ir::Activation::RELU6:
- return nnfw::cker::FusedActivationFunctionType::kRelu6;
- default:
- throw std::runtime_error{"CPU backend: Cannot convert activation type"};
- }
-}
-
-template <typename T>
-void calculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
-{
- if (activation == ir::Activation::RELU)
- {
- *activation_min = 0;
- *activation_max = std::numeric_limits<T>::max();
- }
- else if (activation == ir::Activation::RELU6)
- {
- *activation_min = 0;
- *activation_max = 6;
- }
- else if (activation == ir::Activation::RELU1)
- {
- *activation_min = -1;
- *activation_max = 1;
- }
- else if (activation == ir::Activation::NONE)
- {
- *activation_min = std::numeric_limits<T>::lowest();
- *activation_max = std::numeric_limits<T>::max();
- }
- else
- {
- throw std::runtime_error{"Unsupported activation type"};
- }
-}
-
-inline ir::Shape calcBroadcastShape(const ir::Shape &lhs, const ir::Shape &rhs, bool &success)
-{
- int lhs_rank = lhs.rank();
- int rhs_rank = rhs.rank();
-
- int out_rank = (lhs_rank > rhs_rank ? lhs_rank : rhs_rank);
- ir::Shape out_shape(out_rank);
-
- int lhs_idim = lhs_rank - 1;
- int rhs_idim = rhs_rank - 1;
- success = true;
- for (int out_idim = out_rank - 1; out_idim >= 0; out_idim--)
- {
- if (lhs_idim == -1 && rhs_idim == -1)
- {
- // invalid result
- success = false;
- break;
- }
-
- if (lhs_idim == -1)
- {
- out_shape.dim(out_idim) = rhs.dim(rhs_idim);
- rhs_idim--;
- }
- else if (rhs_idim == -1)
- {
- out_shape.dim(out_idim) = lhs.dim(lhs_idim);
- lhs_idim--;
- }
- else
- {
- if (lhs.dim(lhs_idim) == rhs.dim(rhs_idim))
- {
- out_shape.dim(out_idim) = lhs.dim(lhs_idim);
- lhs_idim--;
- rhs_idim--;
- }
- else if (lhs.dim(lhs_idim) == 1)
- {
- out_shape.dim(out_idim) = rhs.dim(rhs_idim);
- lhs_idim--;
- rhs_idim--;
- }
- else if (rhs.dim(rhs_idim) == 1)
- {
- out_shape.dim(out_idim) = lhs.dim(lhs_idim);
- lhs_idim--;
- rhs_idim--;
- }
- else
- {
- // invalid result
- success = false;
- break;
- }
- }
- }
-
- if (lhs_idim != -1 || rhs_idim != -1)
- {
- // invalid result
- success = false;
- }
- return out_shape;
-}
-
-inline nnfw::cker::PaddingType convertPaddingType(ir::PaddingType ir_padding_type)
-{
- switch (ir_padding_type)
- {
- case ir::PaddingType::EXPLICIT:
- return nnfw::cker::PaddingType::kNone;
- case ir::PaddingType::SAME:
- return nnfw::cker::PaddingType::kSame;
- case ir::PaddingType::VALID:
- return nnfw::cker::PaddingType::kValid;
- default:
- throw std::runtime_error("Wrong padding type.");
- break;
- }
-}
-
-} // namespace interp
-} // namespace onert
-
-#endif // __ONERT_INTERP_OPERATIONS_OPERATION_UTILS_H_
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Pad.h"
-
-#include <cker/operation/Pad.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void preparePad(ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
-
- const auto output_info = env->graph().operands().at(output_index).info();
-
- // Check shape and type lhs is same with rhs
- // TODO Util function to compare TensorInfo
- if (output_info.total_size() == 0)
- {
- throw std::runtime_error{"Interp(Pad): NYI unspecified output shape"};
- }
- else
- {
- env->allocateIfNeeded(output_index, output_info);
- }
-
- const auto output_tensor = env->tensorAt(output_index);
- if (input_tensor->data_type() != output_tensor->data_type())
- {
- throw std::runtime_error{"Interp(Pad): Invalid output type"};
- }
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *pad_tensor, const ITensor *output_tensor)
-{
- const auto input_buffer = input_tensor->bufferRO();
- const auto pad_buffer = pad_tensor->bufferRO();
- auto output_buffer = output_tensor->buffer();
-
- int32_t pad_rank = pad_tensor->getShape().dim(0);
-
- const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
- const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
- const float *input_ptr = reinterpret_cast<const float *>(input_buffer);
- const int32_t *pad_ptr = reinterpret_cast<const int32_t *>(pad_buffer);
- float *output_ptr = reinterpret_cast<float *>(output_buffer);
-
- nnfw::cker::Pad<float>(pad_ptr, pad_rank, cker_input_shape, input_ptr, cker_output_shape,
- output_ptr, nullptr);
-}
-
-void invokePad(const ExecEnv *env, const ir::Operation &node)
-{
- const auto input_index = node.getInputs().at(ir::operation::Pad::INPUT);
- const auto pad_index = node.getInputs().at(ir::operation::Pad::PAD);
- const auto output_index = node.getOutputs().at(0);
-
- const auto input_tensor = env->tensorAt(input_index);
- const auto pad_tensor = env->tensorAt(pad_index);
- const auto output_tensor = env->tensorAt(output_index);
-
- const auto data_type = input_tensor->data_type();
-
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(input_tensor, pad_tensor, output_tensor);
- }
- else
- {
- throw std::runtime_error{"Interp(Pad): NYI - Unsupported data type"};
- }
-}
-} // namespace
-
-OpKernel *getPad()
-{
- static OpKernel kernel = {preparePad, invokePad};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Pool2D.h"
-#include "util/ShapeInference.h"
-#include "util/Utils.h"
-
-#include <cker/operation/AveragePool.h>
-#include <cker/operation/MaxPool.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace pool2d
-{
-
-void preparePool2D(ExecEnv *env, const ir::Operation &node)
-{
- const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
- const auto in_index = node.getInputs().at(pool_node.INPUT);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- UNUSED_RELEASE(in_tensor);
-
- assert(in_tensor->getShape().rank() == 4);
-
- const auto output_info = env->graph().operands().at(out_index).info();
- if (output_info.total_size() == 0)
- {
- // Handle unspecified output shape
- const auto infered_output_shape =
- shape_inference::inferPoolShape(in_tensor->tensorInfo().shape(), pool_node.param());
- env->allocateIfNeeded(
- out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
- }
- else
- {
- env->allocateIfNeeded(out_index, output_info);
- }
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Handle same ifm & ofm data type only
- assert(in_tensor->data_type() == out_tensor->data_type());
- assert(out_tensor->getShape().rank() == 4);
-}
-
-template <typename T>
-void invoke(const nnfw::cker::PoolParams ¶ms, const nnfw::cker::Shape &in_shape,
- const T *in_ptr, const nnfw::cker::Shape &out_shape, T *out_ptr,
- ir::operation::Pool2D::PoolType op_type)
-{
- switch (op_type)
- {
- case ir::operation::Pool2D::PoolType::AVG:
- nnfw::cker::AveragePool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
- break;
- case ir::operation::Pool2D::PoolType::MAX:
- nnfw::cker::MaxPool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
- break;
- default:
- throw std::runtime_error{"Interp(Pool2D): NYI unsupported operation"};
- break;
- }
-}
-
-void invokePool2DOps(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
-
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- // Check lhs shape is same with rhs (with broadcast)
- const auto in_tensor = env->tensorAt(in_index);
- const auto out_tensor = env->tensorAt(out_index);
-
- // TODO support NCHW frontend
- const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto param = pool_node.param();
- const auto padding =
- ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
- // Calculate
- nnfw::cker::PoolParams cker_param;
- cker_param.filter_width = param.kw;
- cker_param.filter_height = param.kh;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
-
- const auto data_type = in_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- calculateActivationRange(param.activation, &cker_param.float_activation_min,
- &cker_param.float_activation_max);
-
- const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
- const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
- float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
- // Now, invoke() supports only Pool2D in float
- invoke<float>(cker_param, in_shape, in_ptr, out_shape, out_ptr, param.op_type);
- }
- else
- {
- throw std::runtime_error{"NYI: Support float only"};
- }
-}
-} // namespace pool2d
-
-OpKernel *getPool2D()
-{
- static OpKernel kernel = {pool2d::preparePool2D, pool2d::invokePool2DOps};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "../Registration.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepare(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- // Unspecified shape is not supported in operation node spec now
- const auto output_info = env->graph().operands().at(out_index).info();
- env->allocateAndShareIfNeeded(out_index, output_info, in_index);
-
- assert(output_info.total_size() == env->graph().operands().at(in_index).info().total_size());
-}
-
-void invoke(const ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- if (env->tensorAt(in_index)->bufferRO() == env->tensorAt(out_index)->bufferRO())
- {
- // Same data
- return;
- }
-
- const auto output_info = env->graph().operands().at(out_index).info();
- memcpy(env->tensorAt(out_index)->buffer(), env->tensorAt(in_index)->bufferRO(),
- output_info.total_size());
-}
-
-} // namespace
-
-OpKernel *getReshape()
-{
- static OpKernel kernel = {prepare, invoke};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/Softmax.h"
-
-#include <cker/operation/SoftMax.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareSoftMax(ExecEnv *env, const ir::Operation &node)
-{
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- UNUSED_RELEASE(in_tensor);
-
- assert((in_tensor->getShape().rank() == 4) || (in_tensor->getShape().rank() == 2));
-
- // Output shape should be same with input
- // Output type is pre-defined in model
- const auto output_shape = env->graph().operands().at(in_index).info().shape();
- const auto output_type = env->graph().operands().at(out_index).info().typeInfo();
-
- const auto output_info = ir::OperandInfo::createStaticInfo(output_shape, output_type);
- env->allocateIfNeeded(out_index, output_info);
-
- auto out_tensor = env->tensorAt(out_index);
- UNUSED_RELEASE(out_tensor);
-
- // Check output shape is same with input
- assert(out_tensor->getShape().rank() == out_tensor->getShape().rank());
- for (int32_t i = 0; i < in_tensor->getShape().rank(); i++)
- {
- assert(in_tensor->getShape().dim(i) == out_tensor->getShape().dim(i));
- }
-}
-
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
- const ir::operation::Softmax::Param ¶m)
-{
- const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
- float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
- float beta = param.beta;
-
- if (in_tensor->getShape().rank() == 2)
- {
- uint32_t batch_size = in_tensor->getShape().dim(0);
- uint32_t input_size = in_tensor->getShape().dim(1);
-
- nnfw::cker::Softmax(in_ptr, input_size, batch_size, beta, out_ptr);
- }
- else if (in_tensor->getShape().rank() == 4)
- {
- const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
- const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-
- nnfw::cker::SoftmaxParams cker_param;
- cker_param.beta = beta;
-
- nnfw::cker::Softmax(cker_param, in_shape, in_ptr, out_shape, out_ptr);
- }
- else
- {
- throw std::runtime_error{"Unsuported input dimension: support 2D or 4D"};
- }
-}
-
-void invokeSoftMax(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &softmax_node = nnfw::misc::polymorphic_downcast<const ir::operation::Softmax &>(node);
-
- const auto in_index = node.getInputs().at(0);
- const auto out_index = node.getOutputs().at(0);
-
- const auto in_tensor = env->tensorAt(in_index);
- const auto out_tensor = env->tensorAt(out_index);
-
- const auto in_data_type = in_tensor->data_type();
- const auto out_data_type = out_tensor->data_type();
- if ((in_data_type == ir::DataType::FLOAT32) && (out_data_type == ir::DataType::FLOAT32))
- {
- invoke(in_tensor, out_tensor, softmax_node.param());
- }
- else
- {
- throw std::runtime_error{"NYI: Support float32 only"};
- }
-}
-
-} // namespace
-
-OpKernel *getSoftmax()
-{
- static OpKernel kernel = {prepareSoftMax, invokeSoftMax};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "OperationUtil.h"
-#include "../Registration.h"
-
-#include "ir/operation/TransposeConv.h"
-
-#include <cker/operation/TransposeConv.h>
-#include <misc/polymorphic_downcast.h>
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareTransposeConv(ExecEnv *env, const ir::Operation &node)
-{
- const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
- const auto ofm_shape_index = node.getInputs().at(ir::operation::TransposeConv::OUTPUT_SHAPE);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto ofm_shape_tensor = env->tensorAt(ofm_shape_index);
-
- assert(ifm_tensor->getShape().rank() == 4);
- assert(ker_tensor->getShape().rank() == 4);
- assert(ofm_shape_tensor->getShape().rank() == 1);
-
- UNUSED_RELEASE(ifm_tensor);
- UNUSED_RELEASE(ker_tensor);
- UNUSED_RELEASE(ofm_shape_tensor);
-
- const auto output_info = env->graph().operands().at(ofm_index).info();
- if (output_info.total_size() == 0)
- {
- // TODO: Handle unspecified output shape
- throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"};
- }
- else
- {
- env->allocateIfNeeded(ofm_index, output_info);
- }
-
- auto ofm_tensor = env->tensorAt(ofm_index);
- UNUSED_RELEASE(ofm_tensor);
-
- // Handle same ifm & ofm data type only
- if (ifm_tensor->data_type() != ofm_tensor->data_type())
- {
- throw std::runtime_error{"Interp(TConv): Different I/O data dype"};
- }
-
- if (ofm_tensor->getShape().rank() != 4)
- {
- throw std::runtime_error{"Interp(TConv): Invalid output rank"};
- }
-}
-
-void invoke(const ITensor *ifm_tensor, const ITensor *ker_tensor, const ITensor *ofm_tensor,
- const ir::operation::TransposeConv::Param ¶m)
-{
- const auto ifm_shape = ifm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- const auto ofm_shape = ofm_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
- // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
- const auto ker_shape = ker_tensor->tensorInfo().shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
- const auto padding =
- ir::calculatePadding(param.padding, ofm_shape, ifm_shape, param.stride, ker_width, ker_height);
-
- nnfw::cker::TransposeConvParams cker_param;
- cker_param.padding_values.width = padding.left;
- cker_param.padding_values.height = padding.top;
- cker_param.stride_width = param.stride.horizontal;
- cker_param.stride_height = param.stride.vertical;
- cker_param.dilation_width_factor = 1;
- cker_param.dilation_height_factor = 1;
-
- const auto cker_ifm_shape = convertShape(ifm_tensor->tensorInfo().shape());
- const auto cker_ker_shape = convertShape(ker_tensor->tensorInfo().shape());
- const auto cker_ofm_shape = convertShape(ofm_tensor->tensorInfo().shape());
- const float *ifm_ptr = reinterpret_cast<const float *>(ifm_tensor->bufferRO());
- const float *ker_ptr = reinterpret_cast<const float *>(ker_tensor->bufferRO());
- float *ofm_ptr = reinterpret_cast<float *>(ofm_tensor->buffer());
-
- nnfw::cker::TransposeConv(cker_param, cker_ifm_shape, ifm_ptr, cker_ker_shape, ker_ptr,
- cker_ofm_shape, ofm_ptr);
-}
-
-void invokeTransposeConv(const ExecEnv *env, const ir::Operation &node)
-{
- const auto &tconv_node =
- nnfw::misc::polymorphic_downcast<const ir::operation::TransposeConv &>(node);
-
- const auto ifm_index = node.getInputs().at(ir::operation::TransposeConv::INPUT);
- const auto ker_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
- const auto ofm_index = node.getOutputs().at(0);
-
- const auto ifm_tensor = env->tensorAt(ifm_index);
- const auto ker_tensor = env->tensorAt(ker_index);
- const auto ofm_tensor = env->tensorAt(ofm_index);
-
- const auto data_type = ifm_tensor->data_type();
- if (data_type == ir::DataType::FLOAT32)
- {
- invoke(ifm_tensor, ker_tensor, ofm_tensor, tconv_node.param());
- }
- else
- {
- throw std::runtime_error{"Interp(TConv): Support float32 only"};
- }
-}
-
-} // namespace
-
-OpKernel *getTransposeConv()
-{
- static OpKernel kernel = {prepareTransposeConv, invokeTransposeConv};
- return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
namespace ir
{
-int32_t const Shape::UNSPECIFIED_DIM = -1;
+int32_t const Shape::kUnspecifiedDim = -1;
// NNFW_MAX_RANK is 6
-int32_t const Shape::MAX_RANK = 6;
+int32_t const Shape::kMaxRank = 6;
FeatureShape Shape::asFeature(Layout layout) const
{
{
// if dimension is 0, it means unspecified and cannot calculate the total number of elements
if (std::any_of(_dimensions.begin(), _dimensions.end(),
- [](const int32_t &v) { return v == UNSPECIFIED_DIM; }))
+ [](const int32_t &v) { return v == kUnspecifiedDim; }))
throw std::runtime_error("num_elements() cannot calculate when any dimension is unspecified");
return std::accumulate(_dimensions.cbegin(), _dimensions.cend(), UINT64_C(1),
Shape permuteShape(const Shape &shape, Layout from, Layout to)
{
- assert(shape.rank() <= Shape::MAX_RANK);
+ assert(shape.rank() <= Shape::kMaxRank);
Shape ret{shape};
if (from == to)
return ret;
onert::ir::Shape shape(2);
shape.dim(0) = 1;
- shape.dim(1) = onert::ir::Shape::UNSPECIFIED_DIM;
+ shape.dim(1) = onert::ir::Shape::kUnspecifiedDim;
ASSERT_EQ(shape.rank(), 2);
ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
_os << "{\n";
_os << " " << quote("traceEvents") << ": [\n";
- for (auto &recorder : recorders)
+ for (const auto &recorder : recorders)
{
flushOneRecord(*recorder);
}
void ChromeTracingWriter::flushOneRecord(const EventRecorder &recorder)
{
- for (auto &evt : recorder.duration_events())
+ for (const auto &evt : recorder.duration_events())
{
const std::string name = getLabel(*evt);
const std::string tid = getTid(*evt);
_os << " " << object(*evt, name, tid) << ",\n";
}
- for (auto &evt : recorder.counter_events())
+ for (const auto &evt : recorder.counter_events())
{
_os << " " << object(evt) << ",\n";
}
void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
{
os << "| ";
- for (auto &key : list)
+ for (const auto &key : list)
{
os << key << " | ";
}
MDTableBuilder &build()
{
- for (auto &it : divideGraph())
+ for (const auto &it : divideGraph())
{
size_t begin_idx = it.first;
size_t end_idx = it.second;
graph.end_ts = std::stoull(_duration_events[end_idx]->ts);
graph.setOperations(name_to_op);
- for (auto &arg : _duration_events[end_idx]->args)
+ for (const auto &arg : _duration_events[end_idx]->args)
{
if (arg.first == "session")
graph.session_index = arg.second;
void MDTableWriter::flush(const std::vector<std::unique_ptr<EventRecorder>> &records)
{
- for (auto &recorder : records)
+ for (const auto &recorder : records)
{
MDTableBuilder(recorder->duration_events(), recorder->counter_events()).build().write(_os);
}
// Memory
{
std::unordered_map<std::string, Stat> mem_stats;
- for (auto &recorder : recorders)
+ for (const auto &recorder : recorders)
{
- for (auto &evt : recorder->counter_events())
+ for (const auto &evt : recorder->counter_events())
{
auto &mem_stat = mem_stats[evt.name];
uint64_t val = std::stoull(evt.values.at("value"));
}
auto &mem = exec_data["memory"] = Json::Value{Json::objectValue};
- for (auto &kv : mem_stats)
+ for (const auto &kv : mem_stats)
{
auto &key = kv.first;
auto &val = kv.second;
// 2D keys : stats[tid][name]
std::unordered_map<std::string, std::unordered_map<std::string, Stat>> stats;
std::unordered_map<std::string, std::unordered_map<std::string, uint64_t>> begin_timestamps;
- for (auto &recorder : recorders)
+ for (const auto &recorder : recorders)
{
- for (auto &evt : recorder->duration_events())
+ for (const auto &evt : recorder->duration_events())
{
std::string evt_name = getLabel(*evt);
std::string evt_tid = getBackend(*evt);
}
}
- for (auto &kv : begin_timestamps)
- for (auto &kv2 : kv.second)
+ for (const auto &kv : begin_timestamps)
+ for (const auto &kv2 : kv.second)
if (kv2.second != 0)
throw std::runtime_error{"Invalid Data - B and E pair does not match."};
- for (auto &kv : stats)
+ for (const auto &kv : stats)
{
- auto &tid = kv.first;
- auto &map = kv.second;
+ const auto &tid = kv.first;
+ const auto &map = kv.second;
auto &json_tid = exec_data[tid] = Json::Value{Json::objectValue};
- for (auto &kv : map)
+ for (const auto &kv : map)
{
auto &name = kv.first;
auto &val = kv.second;
const size_t total_num_elements)
{
ir::Shape ret(shape_num_elements);
- int32_t flatten_dim = ir::Shape::UNSPECIFIED_DIM;
+ int32_t flatten_dim = ir::Shape::kUnspecifiedDim;
for (int32_t i = 0; i < shape_num_elements; ++i)
{
if (shape_buf[i] < 0)
{
- if (flatten_dim != ir::Shape::UNSPECIFIED_DIM)
+ if (flatten_dim != ir::Shape::kUnspecifiedDim)
throw std::runtime_error("Reshape: 2nd param has special dim(for flatten) more than twice");
flatten_dim = i;
ret.dim(i) = 1;
ret.dim(i) = shape_buf[i];
}
}
- if (flatten_dim != ir::Shape::UNSPECIFIED_DIM)
+ if (flatten_dim != ir::Shape::kUnspecifiedDim)
ret.dim(flatten_dim) = total_num_elements / ret.num_elements();
// Check reshapable
* @param model reference to model
*/
explicit BaseLoader(std::unique_ptr<ir::Model> &model)
- : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _model(model), _domain_model{nullptr},
- _tensor_names(std::make_shared<std::unordered_map<ir::OperandIndex, std::string>>())
+ : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _model(model), _domain_model{nullptr}
{
_use_mmaped_data = util::getConfigBool(util::config::USE_MMAPED_DATA);
}
const Model *_domain_model;
// Maps Tensor indices to onert Operands.
std::vector<ir::OperandIndex> _tensor_to_operand;
- std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> _tensor_names;
+ std::unordered_map<ir::OperandIndex, std::string> _tensor_names;
// Verifier
std::unique_ptr<Verifier> _verifier;
// Boolean flag to use MMAPED_DATA
subg.setOperandValue(operand_index, std::move(data_obj));
}
- _tensor_names->emplace(operand_index, tensor->name()->str());
+ _tensor_names.emplace(operand_index, tensor->name()->str());
// Variable
if (tensor->is_variable())
verifySubgraphIndex(else_index);
ir::operation::If::Param param;
- param.then_subg_index = ir::SubgraphIndex{static_cast<uint32_t>(then_index)};
- param.else_subg_index = ir::SubgraphIndex{static_cast<uint32_t>(else_index)};
+ param.then_subg_index = ir::SubgraphIndex{static_cast<uint16_t>(then_index)};
+ param.else_subg_index = ir::SubgraphIndex{static_cast<uint16_t>(else_index)};
loadOperationTo<ir::operation::If>(op, subg, param);
}
verifySubgraphIndex(body_index);
ir::operation::While::Param param;
- param.cond_subg_index = ir::SubgraphIndex{static_cast<uint32_t>(cond_index)};
- param.body_subg_index = ir::SubgraphIndex{static_cast<uint32_t>(body_index)};
+ param.cond_subg_index = ir::SubgraphIndex{static_cast<uint16_t>(cond_index)};
+ param.body_subg_index = ir::SubgraphIndex{static_cast<uint16_t>(body_index)};
loadOperationTo<ir::operation::While>(op, subg, param);
}
case BuiltinOperator::BuiltinOperator_DEPTH_TO_SPACE:
loadDepthToSpace(op, subg);
return;
+ case BuiltinOperator::BuiltinOperator_EMBEDDING_LOOKUP:
+ loadOperationTo<ir::operation::EmbeddingLookup>(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_HASHTABLE_LOOKUP:
+ loadOperationTo<ir::operation::HashtableLookup>(op, subg);
+ return;
default:
throw std::runtime_error(
std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
// Load subgraphs and map operations on subgraph
const auto subgraphs = _domain_model->subgraphs();
auto model = std::make_unique<ir::Model>();
- for (uint32_t subgraph_index = 0; subgraph_index < subgraphs->size(); ++subgraph_index)
+ if (subgraphs->size() - 1 > ir::SubgraphIndex::max())
+ throw std::runtime_error{"The number of subgraphs cannot exceed " +
+ std::to_string(ir::SubgraphIndex::max() + 1)};
+ for (uint16_t subgraph_index = 0; subgraph_index < subgraphs->size(); ++subgraph_index)
{
auto subg = loadSubgraph((*_domain_model->subgraphs())[subgraph_index]);
- model->push(ir::SubgraphIndex{subgraph_index}, std::move(subg));
+ // NOTE: Used () instead of {}, which does not check narrowing.
+ // It is okay since overflow is checked the above if-statement.
+ model->push(ir::SubgraphIndex(subgraph_index), std::move(subg));
}
_model = std::move(model);
}
for (const std::int32_t input_ind : *circle_subg->inputs())
{
subg->addInput(tensorIdxToOperandIdx(input_ind),
- _tensor_names->at(_tensor_to_operand[input_ind]));
+ _tensor_names.at(_tensor_to_operand[input_ind]));
}
// Set outputs
for (const std::int32_t output_ind : *circle_subg->outputs())
{
subg->addOutput(tensorIdxToOperandIdx(output_ind),
- _tensor_names->at(_tensor_to_operand[output_ind]));
+ _tensor_names.at(_tensor_to_operand[output_ind]));
}
// Create operations
for (const auto *op : *circle_subg->operators())
target_link_libraries(test_onert_frontend_nnapi PRIVATE gtest)
target_link_libraries(test_onert_frontend_nnapi PRIVATE gtest_main)
-install(TARGETS test_onert_frontend_nnapi DESTINATION unittest_standalone)
+install(TARGETS test_onert_frontend_nnapi DESTINATION unittest)
return ANEURALNETWORKS_UNEXPECTED_NULL;
}
- if (compilation->state() != ::onert::compiler::State::CREATED)
+ if (compilation->isFinished())
{
VERBOSE(NNAPI::Compilation) << "finish: Already finished" << std::endl;
return ANEURALNETWORKS_BAD_STATE;
return ANEURALNETWORKS_UNEXPECTED_NULL;
}
- if (compilation->state() != ::onert::compiler::State::CREATED)
+ if (compilation->isFinished())
{
VERBOSE(NNAPI::Compilation) << "setPreference: Already finished" << std::endl;
return ANEURALNETWORKS_BAD_STATE;
return ANEURALNETWORKS_UNEXPECTED_NULL;
}
- std::shared_ptr<onert::exec::Executors> executors;
+ std::shared_ptr<onert::exec::IExecutors> executors;
compilation->publish(executors);
_compiler{std::make_shared<compiler::Compiler>(_model, *_coptions)}
{
if (model->allowedToFp16())
- {
- _compiler->enableToFp16();
- }
+ _coptions->enableToFp16();
}
bool ANeuralNetworksCompilation::finish() noexcept
try
{
_artifact = _compiler->compile();
+ _compiler = nullptr;
}
catch (const std::exception &e)
{
#include "compiler/Compiler.h"
#include "ir/Graph.h"
#include "ir/Model.h"
-#include "exec/Executors.h"
+#include "exec/IExecutors.h"
#include "util/TracingCtx.h"
struct ANeuralNetworksCompilation
public:
bool finish() noexcept;
+ bool isFinished() noexcept { return _compiler == nullptr; }
- onert::compiler::State state(void) noexcept { return _compiler->state(); }
- void publish(std::shared_ptr<onert::exec::Executors> &executors) noexcept
+ void publish(std::shared_ptr<onert::exec::IExecutors> &executors) noexcept
{
executors = _artifact ? _artifact->_executors : nullptr;
}
struct ANeuralNetworksExecution
{
public:
- ANeuralNetworksExecution(const std::shared_ptr<onert::exec::Executors> &executors)
+ ANeuralNetworksExecution(const std::shared_ptr<onert::exec::IExecutors> &executors)
: _execution{std::make_shared<onert::exec::Execution>(executors)}
{
// DO NOTHING
for (const std::int32_t input_ind : *tflite_subg->inputs())
{
subg->addInput(tensorIdxToOperandIdx(input_ind),
- _tensor_names->at(_tensor_to_operand[input_ind]));
+ _tensor_names.at(_tensor_to_operand[input_ind]));
}
// Set outputs
for (const std::int32_t output_ind : *tflite_subg->outputs())
{
subg->addOutput(tensorIdxToOperandIdx(output_ind),
- _tensor_names->at(_tensor_to_operand[output_ind]));
+ _tensor_names.at(_tensor_to_operand[output_ind]));
}
// Create operations
for (const auto *op : *tflite_subg->operators())
loadOperation(op, *subg);
}
- subg->setTensorName(_tensor_names);
subg->verify();
return subg;
return()
endif(NOT BUILD_NPUD)
-nnfw_find_package(GLib2.0 REQUIRED)
+set(NPUD_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR})
-file(GLOB_RECURSE SOURCES "*.cc")
+nnfw_find_package(Gio2.0 REQUIRED)
+nnfw_find_package(Giounix2.0 REQUIRED)
-add_executable(npud ${SOURCES})
-set_target_properties(npud PROPERTIES LINKER_LANGUAGE CXX)
-target_include_directories(npud PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-target_include_directories(npud PUBLIC ${GLIB2.0_INCLUDE_DIRS})
-target_link_libraries(npud PRIVATE nnfw_lib_misc)
-target_link_libraries(npud PRIVATE ${GLIB2.0_LIBRARIES})
-target_link_libraries(npud PRIVATE ${LIB_PTHREAD})
+find_program(GDBUS_CODEGEN NAMES gdbus-codegen)
+if (NOT GDBUS_CODEGEN)
+ message(SEND_ERROR "Could not find gdbus-codegen")
+endif(NOT GDBUS_CODEGEN)
-if(ENVVAR_NPUD_CONFIG)
- target_compile_definitions(npud PRIVATE ENVVAR_FOR_DEFAULT_CONFIG)
-endif(ENVVAR_NPUD_CONFIG)
+set(DBUS_INCLUDE_DIRS "${CMAKE_CURRENT_BINARY_DIR}")
+set(DBUS_INTERFACE "org.tizen.npud")
+set(DBUS_NAMESPACE "Npud")
+set(DBUS_INTROSPECTION_XML "org.tizen.npud.xml")
+set(DBUS_CORE "dbus-core")
+set(DBUS_CORE_SOURCE "${DBUS_CORE}.c")
+set(DBUS_CONFIG_FILE "org.tizen.npud.conf")
-install(TARGETS npud DESTINATION bin)
+add_custom_command(OUTPUT ${DBUS_CORE_SOURCE}
+ COMMAND ${GDBUS_CODEGEN}
+ --generate-c-code ${DBUS_CORE}
+ --interface-prefix ${DBUS_INTERFACE}
+ --c-namespace ${DBUS_NAMESPACE}
+ ${CMAKE_CURRENT_SOURCE_DIR}/${DBUS_INTROSPECTION_XML}
+ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${DBUS_INTROSPECTION_XML})
+
+add_library(npud_dbus STATIC ${DBUS_CORE_SOURCE})
+
+target_include_directories(npud_dbus PUBLIC ${GIO2.0_INCLUDE_DIRS})
+target_include_directories(npud_dbus PUBLIC ${GIO_UNIX_2.0_INCLUDE_DIRS})
+target_link_libraries(npud_dbus PRIVATE ${GIO2.0_LIBRARIES})
+target_link_libraries(npud_dbus PRIVATE ${GIO_UNIX_2.0_LIBRARIES})
+
+install(FILES ${DBUS_CONFIG_FILE} DESTINATION share)
+
+add_subdirectory(core)
+add_subdirectory(tests)
+add_subdirectory(backend)
--- /dev/null
+# Backends
+add_subdirectory(trix)
--- /dev/null
+nnfw_find_package(TRIXEngine QUIET 2.5.0)
+
+if(NOT TRIXEngine_FOUND)
+ return()
+endif(NOT TRIXEngine_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(npud_backend_trix SHARED ${SOURCES})
+
+target_include_directories(npud_backend_trix PUBLIC ${NPUD_INCLUDE_DIRS})
+target_link_libraries(npud_backend_trix PRIVATE nnfw_lib_misc)
+target_link_libraries(npud_backend_trix PRIVATE trix_engine)
+
+if(ENVVAR_NPUD_CONFIG)
+ target_compile_definitions(npud_backend_trix PRIVATE ENVVAR_FOR_DEFAULT_CONFIG)
+endif(ENVVAR_NPUD_CONFIG)
+
+install(TARGETS npud_backend_trix DESTINATION lib)
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TrixBackend.h"
+
+#include <algorithm>
+
+#if defined(__linux__)
+extern "C" {
+using namespace ::npud::backend::trix;
+
+TrixBackend *allocate() { return new TrixBackend(); }
+
+void deallocate(TrixBackend *trix) { delete trix; }
+}
+#endif
+
+namespace npud
+{
+namespace backend
+{
+namespace trix
+{
+
+TrixBackend::TrixBackend() : _devType(NPUCOND_TRIV2_CONN_SOCIP)
+{
+ auto coreNum = getnumNPUdeviceByType(_devType);
+ if (coreNum <= 0)
+ {
+ return;
+ }
+
+ std::vector<npudev_h> handles;
+ for (int i = 0; i < coreNum; ++i)
+ {
+ npudev_h handle;
+ if (getNPUdeviceByType(&handle, _devType, i) < 0)
+ {
+ // Note Run for all cores.
+ continue;
+ }
+ handles.emplace_back(handle);
+ }
+
+ if (handles.size() == 0)
+ {
+ return;
+ }
+
+ _dev = std::make_unique<TrixDevice>();
+ _dev->handles = std::move(handles);
+}
+
+TrixBackend::~TrixBackend()
+{
+ for (const auto &ctx : _dev->ctxs)
+ {
+ npudev_h handle = _dev->handles.at(ctx->defaultCore);
+ for (const auto id : ctx->requests)
+ {
+ removeNPU_request(handle, id);
+ }
+ }
+
+ for (const auto &handle : _dev->handles)
+ {
+ unregisterNPUmodel_all(handle);
+ putNPUdevice(handle);
+ }
+}
+
+NpuStatus TrixBackend::getVersion(std::string &version)
+{
+ // TODO Implement details
+ return NPU_STATUS_ERROR_NOT_SUPPORTED;
+}
+
+NpuStatus TrixBackend::createContext(int deviceId, int priority, NpuContext **ctx)
+{
+ if (deviceId >= _dev->handles.size())
+ {
+ return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+ }
+ auto context = std::make_unique<NpuContext>();
+ context->defaultCore = deviceId;
+ // TODO Consider priority.
+ *ctx = context.get();
+ _dev->ctxs.emplace_back(std::move(context));
+ return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::destroyContext(NpuContext *ctx)
+{
+ if (ctx == nullptr)
+ {
+ return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+ }
+
+ auto citer = std::find_if(_dev->ctxs.begin(), _dev->ctxs.end(),
+ [&](std::unique_ptr<NpuContext> &c) { return c.get() == ctx; });
+ if (citer == _dev->ctxs.end())
+ {
+ return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+ }
+
+ npudev_h handle = _dev->handles.at(ctx->defaultCore);
+
+ for (auto rid : ctx->requests)
+ {
+ if (removeNPU_request(handle, rid) < 0)
+ {
+ return NPU_STATUS_ERROR_OPERATION_FAILED;
+ }
+ _dev->requests.erase(rid);
+ }
+
+ for (auto mid : ctx->models)
+ {
+ auto &minfo = _dev->models.at(mid);
+ if (--minfo->refCount == 0)
+ {
+ if (unregisterNPUmodel(handle, mid) < 0)
+ {
+ return NPU_STATUS_ERROR_OPERATION_FAILED;
+ }
+ _dev->models.erase(mid);
+ }
+ }
+
+ _dev->ctxs.erase(citer);
+ return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::createBuffer(NpuContext *ctx, GenericBuffer *buffer)
+{
+ // TODO Implement details
+ return NPU_STATUS_ERROR_NOT_SUPPORTED;
+}
+
+NpuStatus TrixBackend::destroyBuffer(NpuContext *ctx, GenericBuffer *buffer)
+{
+ // TODO Implement details
+ return NPU_STATUS_ERROR_NOT_SUPPORTED;
+}
+
+NpuStatus TrixBackend::registerModel(NpuContext *ctx, const std::string &modelPath,
+ ModelID *modelId)
+{
+ if (ctx == nullptr)
+ {
+ return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+ }
+
+ ModelID id = 0;
+ auto iter =
+ std::find_if(_dev->models.begin(), _dev->models.end(),
+ [&](const std::pair<const ModelID, std::unique_ptr<TrixModelInfo>> &p) {
+ return p.second->core == ctx->defaultCore && p.second->path == modelPath;
+ });
+ // Already registered model.
+ if (iter != _dev->models.end())
+ {
+ _dev->models.at(iter->first)->refCount++;
+ ctx->models.emplace_back(iter->first);
+ }
+ else
+ {
+ auto meta = getNPUmodel_metadata(modelPath.c_str(), false);
+ if (meta == nullptr)
+ {
+ return NPU_STATUS_ERROR_OPERATION_FAILED;
+ }
+
+ generic_buffer fileInfo;
+ fileInfo.type = BUFFER_FILE;
+ fileInfo.filepath = modelPath.c_str();
+ fileInfo.size = meta->size;
+
+ npudev_h handle = _dev->handles.at(ctx->defaultCore);
+ if (registerNPUmodel(handle, &fileInfo, &id) < 0)
+ {
+ return NPU_STATUS_ERROR_OPERATION_FAILED;
+ }
+
+ _dev->models.insert(std::make_pair(id, std::unique_ptr<TrixModelInfo>(new TrixModelInfo{
+ id, modelPath, ctx->defaultCore, meta, 1})));
+ ctx->models.emplace_back(id);
+ }
+
+ *modelId = id;
+ return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::unregisterModel(NpuContext *ctx, ModelID modelId)
+{
+ if (ctx == nullptr)
+ {
+ return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+ }
+
+ auto miter = std::find(ctx->models.begin(), ctx->models.end(), modelId);
+ if (miter == ctx->models.end())
+ {
+ return NPU_STATUS_ERROR_INVALID_MODEL;
+ }
+
+ npudev_h handle = _dev->handles.at(ctx->defaultCore);
+
+ for (auto riter = ctx->requests.begin(); riter != ctx->requests.end();)
+ {
+ auto &rinfo = _dev->requests.at(*riter);
+ if (rinfo->modelId == modelId)
+ {
+ if (removeNPU_request(handle, rinfo->id) < 0)
+ {
+ return NPU_STATUS_ERROR_OPERATION_FAILED;
+ }
+ _dev->requests.erase(rinfo->id);
+ riter = ctx->requests.erase(riter);
+ }
+ else
+ {
+ ++riter;
+ }
+ }
+
+ auto &minfo = _dev->models.at(modelId);
+ if (--minfo->refCount == 0)
+ {
+ if (unregisterNPUmodel(handle, modelId) < 0)
+ {
+ return NPU_STATUS_ERROR_OPERATION_FAILED;
+ }
+ _dev->models.erase(modelId);
+ }
+
+ ctx->models.erase(miter);
+ return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::createRequest(NpuContext *ctx, ModelID modelId, RequestID *requestId)
+{
+ if (ctx == nullptr)
+ {
+ return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+ }
+
+ auto miter = std::find(ctx->models.begin(), ctx->models.end(), modelId);
+ if (miter == ctx->models.end())
+ {
+ return NPU_STATUS_ERROR_INVALID_MODEL;
+ }
+
+ int id = 0;
+ npudev_h handle = _dev->handles.at(ctx->defaultCore);
+ if (createNPU_request(handle, modelId, &id) < 0)
+ {
+ return NPU_STATUS_ERROR_OPERATION_FAILED;
+ }
+
+ _dev->requests.insert(std::make_pair(id, std::unique_ptr<TrixRequestInfo>(new TrixRequestInfo{
+ static_cast<RequestID>(id), modelId})));
+ ctx->requests.emplace_back(id);
+
+ *requestId = id;
+ return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::destroyRequest(NpuContext *ctx, RequestID requestId)
+{
+ if (ctx == nullptr)
+ {
+ return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+ }
+
+ auto riter = std::find(ctx->requests.begin(), ctx->requests.end(), requestId);
+ if (riter == ctx->requests.end())
+ {
+ return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+ }
+
+ npudev_h handle = _dev->handles.at(ctx->defaultCore);
+ if (removeNPU_request(handle, requestId) < 0)
+ {
+ return NPU_STATUS_ERROR_OPERATION_FAILED;
+ }
+
+ _dev->requests.erase(requestId);
+ ctx->requests.erase(riter);
+ return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::setRequestData(NpuContext *ctx, RequestID requestId, InputBuffers *inputBufs,
+ TensorDataInfos *inputInfos, OutputBuffers *outputBufs,
+ TensorDataInfos *outputInfos)
+{
+ auto citer = std::find_if(_dev->ctxs.begin(), _dev->ctxs.end(),
+ [&](std::unique_ptr<NpuContext> &c) { return c.get() == ctx; });
+ if (citer == _dev->ctxs.end())
+ {
+ return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+ }
+
+ auto riter = std::find(ctx->requests.begin(), ctx->requests.end(), requestId);
+ if (riter == ctx->requests.end())
+ {
+ return NPU_STATUS_ERROR_INVALID_ARGUMENT;
+ }
+
+ auto &req = _dev->requests.at(requestId);
+ auto miter = std::find(ctx->models.begin(), ctx->models.end(), req->modelId);
+ if (miter == ctx->models.end())
+ {
+ return NPU_STATUS_ERROR_INVALID_MODEL;
+ }
+
+ // TODO Exception controll of `at`
+ auto &minfo = _dev->models.at(req->modelId);
+ if (minfo->meta->input_seg_num != inputBufs->numBuffers ||
+ minfo->meta->output_seg_num != outputBufs->numBuffers)
+ {
+ return NPU_STATUS_ERROR_INVALID_DATA;
+ }
+
+ auto &inInfos = req->inInfos;
+ auto &outInfos = req->outInfos;
+
+ inInfos->num_info = inputBufs->numBuffers;
+ for (auto i = 0; i < inInfos->num_info; ++i)
+ {
+ inInfos->info[i].layout = DATA_LAYOUT_MODEL;
+ inInfos->info[i].type = minfo->meta->input_seg_quant_type[i];
+ }
+
+ outInfos->num_info = outputBufs->numBuffers;
+ for (auto i = 0; i < outInfos->num_info; ++i)
+ {
+ outInfos->info[i].layout = DATA_LAYOUT_MODEL;
+ outInfos->info[i].type = minfo->meta->output_seg_quant_type[i];
+ }
+
+ auto &inBufs = req->inBufs;
+ auto &outBufs = req->outBufs;
+
+ inBufs->num_buffers = inputBufs->numBuffers;
+ for (auto i = 0; i < inBufs->num_buffers; ++i)
+ {
+ if (inputBufs->buffers[i].type == NPU_BUFFER_MAPPED)
+ {
+ inBufs->bufs[i].addr = inputBufs->buffers[i].addr;
+ }
+ else if (inputBufs->buffers[i].type == NPU_BUFFER_DMABUF)
+ {
+ // TODO Implement details
+ // inBufs.bufs[i].dmabuf = inputBufs->buffers[i].dmabuf;
+ // inBufs.bufs[i].offset = inputBufs->buffers[i].offset;
+ }
+ else
+ {
+ continue;
+ }
+ inBufs->bufs[i].size = inputBufs->buffers[i].size;
+ inBufs->bufs[i].type = static_cast<buffer_types>(inputBufs->buffers[i].type);
+ }
+
+ outBufs->num_buffers = outputBufs->numBuffers;
+ for (auto i = 0; i < outBufs->num_buffers; ++i)
+ {
+ if (outputBufs->buffers[i].type == NPU_BUFFER_MAPPED)
+ {
+ outBufs->bufs[i].addr = outputBufs->buffers[i].addr;
+ }
+ else if (outputBufs->buffers[i].type == NPU_BUFFER_DMABUF)
+ {
+ // TODO Implement details
+ // outBufs.bufs[i].dmabuf = outputBufs->buffers[i].dmabuf;
+ // outBufs.bufs[i].offset = outputBufs->buffers[i].offset;
+ }
+ else
+ {
+ continue;
+ }
+ outBufs->bufs[i].size = outputBufs->buffers[i].size;
+ outBufs->bufs[i].type = static_cast<buffer_types>(outputBufs->buffers[i].type);
+ }
+
+ npudev_h handle = _dev->handles.at(ctx->defaultCore);
+ if (setNPU_requestData(handle, requestId, inBufs.get(), inInfos.get(), outBufs.get(),
+ outInfos.get()) < 0)
+ {
+ return NPU_STATUS_ERROR_OPERATION_FAILED;
+ }
+
+ return NPU_STATUS_SUCCESS;
+}
+
+NpuStatus TrixBackend::submitRequest(NpuContext *ctx, RequestID requestId)
+{
+ // TODO Implement details
+ return NPU_STATUS_ERROR_NOT_SUPPORTED;
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace npud
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_BACKEND_TRIX_BACKEND_H__
+#define __ONE_SERVICE_NPUD_BACKEND_TRIX_BACKEND_H__
+
+#include <core/Backend.h>
+#include <libnpuhost.h>
+#include <memory>
+#include <vector>
+#include <map>
+
+namespace npud
+{
+namespace backend
+{
+namespace trix
+{
+
+using namespace ::npud::core;
+
+using Handle = void *;
+
+/**
+ * @brief Trix model information.
+ *
+ * @param id The model identifier.
+ * @param path The model path.
+ * @param core The core number where the model is registered.
+ * @param meta The meta data of model.
+ * @param refCount The reference count of model users.
+ */
+struct TrixModelInfo
+{
+ ModelID id;
+ std::string path;
+ int core;
+ npubin_meta *meta;
+ int refCount;
+
+ TrixModelInfo() : meta(nullptr), refCount(0) {}
+ TrixModelInfo(ModelID _id, const std::string &_path, int _core, npubin_meta *_meta, int _refCount)
+ : id(_id), path(_path), core(_core), meta(_meta), refCount(_refCount)
+ {
+ }
+ ~TrixModelInfo() { free(meta); }
+};
+
+/**
+ * @brief Trix request information
+ *
+ * @param id The request identifier.
+ * @param modelId The model id of request.
+ */
+struct TrixRequestInfo
+{
+ RequestID id;
+ ModelID modelId;
+ std::unique_ptr<input_buffers> inBufs;
+ std::unique_ptr<tensors_data_info> inInfos;
+ std::unique_ptr<output_buffers> outBufs;
+ std::unique_ptr<tensors_data_info> outInfos;
+ TrixRequestInfo(RequestID _id, ModelID _mid)
+ : id(_id), modelId(_mid), inBufs(std::make_unique<input_buffers>()),
+ inInfos(std::make_unique<tensors_data_info>()), outBufs(std::make_unique<output_buffers>()),
+ outInfos(std::make_unique<tensors_data_info>())
+ {
+ }
+};
+
+/**
+ * @brief Trix device information
+ *
+ * @param handles The device handle list.
+ * @param ctxs The NpuContext list.
+ * @param models The model map.
+ * @param requests The request map.
+ */
+struct TrixDevice
+{
+ std::vector<Handle> handles;
+ std::vector<std::unique_ptr<NpuContext>> ctxs;
+ std::map<ModelID, std::unique_ptr<TrixModelInfo>> models;
+ std::map<RequestID, std::unique_ptr<TrixRequestInfo>> requests;
+};
+
+class TrixBackend : public Backend
+{
+public:
+ TrixBackend();
+ ~TrixBackend();
+
+ NpuStatus getVersion(std::string &version) override;
+ NpuStatus createContext(int deviceId, int priority, NpuContext **ctx) override;
+ NpuStatus destroyContext(NpuContext *ctx) override;
+ NpuStatus createBuffer(NpuContext *ctx, GenericBuffer *buffer) override;
+ NpuStatus destroyBuffer(NpuContext *ctx, GenericBuffer *buffer) override;
+ // TODO Support to register model from buffer
+ NpuStatus registerModel(NpuContext *ctx, const std::string &modelPath, ModelID *modelId) override;
+ NpuStatus unregisterModel(NpuContext *ctx, ModelID modelId) override;
+ NpuStatus createRequest(NpuContext *ctx, ModelID modelId, RequestID *requestId) override;
+ NpuStatus destroyRequest(NpuContext *ctx, RequestID requestId) override;
+ NpuStatus setRequestData(NpuContext *ctx, RequestID requestId, InputBuffers *inputBufs,
+ TensorDataInfos *inputInfos, OutputBuffers *outputBufs,
+ TensorDataInfos *outputInfos) override;
+ NpuStatus submitRequest(NpuContext *ctx, RequestID requestId) override;
+
+private:
+ dev_type _devType;
+ std::unique_ptr<TrixDevice> _dev;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_BACKEND_TRIX_BACKEND_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_BACKEND_H__
+#define __ONE_SERVICE_NPUD_CORE_BACKEND_H__
+
+#include "ir/Layout.h"
+#include "ir/DataType.h"
+
+#include <string>
+#include <vector>
+
+namespace npud
+{
+namespace core
+{
+
+#define NPU_TENSOR_MAX (16)
+
+/**
+ * @brief Npu model ID.
+ *
+ */
+using ModelID = uint32_t;
+
+/**
+ * @brief Npu request ID.
+ *
+ */
+using RequestID = uint32_t;
+
+/**
+ * @brief Npu buffer type
+ *
+ */
+enum BufferTypes
+{
+ NPU_BUFFER_MAPPED, /**< buffer is a memory-mapped ptr */
+ NPU_BUFFER_DMABUF, /**< buffer is a dmabuf fd, representing contiguous memory */
+ NPU_BUFFER_UNDEFINED /**< buffer type is undefined */
+};
+
+/**
+ * @brief Various kinds of buffer supported for input/output/model.
+ *
+ */
+struct GenericBuffer
+{
+ struct
+ { /** NPU_BUFFER_MAPPED/DMABUF */
+ void *addr; /**< Mapped address of the buffer */
+ struct
+ { /** NPU_BUFFER_DMABUF only */
+ int dmabuf; /**< The dma-buf fd handle of the memory allocated */
+ uint64_t offset; /**< Offset to be applied to the base memory address */
+ };
+ };
+ uint64_t size; /**< The size of the buffer in bytes */
+ BufferTypes type; /**< Type of memory in this buffer */
+};
+
+/**
+ * @brief Npu generic buffer array.
+ *
+ */
+struct GenericBuffers
+{
+ uint32_t numBuffers;
+ GenericBuffer buffers[NPU_TENSOR_MAX];
+};
+
+/**
+ * @brief Npu input/output buffers are compotible with GenericBuffers.
+ *
+ */
+typedef GenericBuffers InputBuffers;
+typedef GenericBuffers OutputBuffers;
+
+/**
+ * @brief Npu tensor data info description.
+ *
+ */
+struct TensorDataInfo
+{
+ ir::Layout layout;
+ ir::DataType type;
+};
+
+/**
+ * @brief Npu tensor data info array.
+ *
+ */
+struct TensorDataInfos
+{
+ uint32_t numInfos;
+ TensorDataInfo infos[NPU_TENSOR_MAX];
+};
+
+/**
+ * @brief Npu error status.
+ *
+ */
+enum NpuStatus
+{
+ NPU_STATUS_SUCCESS = 0,
+ NPU_STATUS_ERROR_OPERATION_FAILED,
+ NPU_STATUS_ERROR_NOT_SUPPORTED,
+ NPU_STATUS_ERROR_INVALID_ARGUMENT,
+ NPU_STATUS_ERROR_INVALID_MODEL,
+ NPU_STATUS_ERROR_INVALID_DATA,
+};
+
+/**
+ * @brief Npu context definition
+ *
+ * @param models The model lists.
+ * @param requests The request lists.
+ * @param defaultCore The core number to be used by default.
+ */
+struct NpuContext
+{
+ std::vector<ModelID> models;
+ std::vector<RequestID> requests;
+ int defaultCore;
+};
+
+/**
+ * @brief Npu backend interface
+ *
+ * Backend module should implement this Backend interface.
+ * Npu daemon will load this class symbol at runtime.
+ */
+class Backend
+{
+public:
+ virtual ~Backend() = default;
+
+ virtual NpuStatus getVersion(std::string &version) = 0;
+ virtual NpuStatus createContext(int deviceId, int priority, NpuContext **ctx) = 0;
+ virtual NpuStatus destroyContext(NpuContext *ctx) = 0;
+ virtual NpuStatus createBuffer(NpuContext *ctx, GenericBuffer *buffer) = 0;
+ virtual NpuStatus destroyBuffer(NpuContext *ctx, GenericBuffer *buffer) = 0;
+ // TODO Support to register model from buffer
+ virtual NpuStatus registerModel(NpuContext *ctx, const std::string &modelPath,
+ ModelID *modelId) = 0;
+ virtual NpuStatus unregisterModel(NpuContext *ctx, ModelID modelId) = 0;
+ virtual NpuStatus createRequest(NpuContext *ctx, ModelID modelId, RequestID *requestId) = 0;
+ virtual NpuStatus destroyRequest(NpuContext *ctx, RequestID requestId) = 0;
+ virtual NpuStatus setRequestData(NpuContext *ctx, RequestID requestId, InputBuffers *inputBufs,
+ TensorDataInfos *inputInfos, OutputBuffers *outputBufs,
+ TensorDataInfos *outputInfos) = 0;
+ virtual NpuStatus submitRequest(NpuContext *ctx, RequestID requestId) = 0;
+};
+
+typedef Backend *(*NpuAlloc)();
+typedef void (*NpuDealloc)(Backend *);
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_BACKEND_H__
--- /dev/null
+nnfw_find_package(GLib2.0 REQUIRED)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+file(GLOB_RECURSE MAIN_SOURCE_FILE "main.cc")
+list(REMOVE_ITEM SOURCES ${MAIN_SOURCE_FILE})
+
+add_library(npud_core STATIC ${SOURCES})
+
+set_target_properties(npud_core PROPERTIES LINKER_LANGUAGE CXX)
+target_include_directories(npud_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_include_directories(npud_core PUBLIC ${GLIB2.0_INCLUDE_DIRS})
+target_include_directories(npud_core PUBLIC ${DBUS_INCLUDE_DIRS})
+target_link_libraries(npud_core PRIVATE nnfw_lib_misc)
+target_link_libraries(npud_core PRIVATE ${GLIB2.0_LIBRARIES})
+target_link_libraries(npud_core PRIVATE ${LIB_PTHREAD})
+target_link_libraries(npud_core PRIVATE dl)
+target_link_libraries(npud_core PRIVATE npud_dbus)
+
+if(ENVVAR_NPUD_CONFIG)
+ target_compile_definitions(npud_core PRIVATE ENVVAR_FOR_DEFAULT_CONFIG)
+endif(ENVVAR_NPUD_CONFIG)
+
+# npud executable
+add_executable(npud ${MAIN_SOURCE_FILE})
+
+set_target_properties(npud PROPERTIES LINKER_LANGUAGE CXX)
+target_link_libraries(npud PRIVATE npud_core)
+
+install(TARGETS npud DESTINATION bin)
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ContextManager.h"
+
+#include <algorithm>
+#include <util/Logging.h>
+
+namespace npud
+{
+namespace core
+{
+
+ContextManager::ContextManager() noexcept {}
+
+ContextManager::~ContextManager() noexcept { _contexts.clear(); }
+
+void ContextManager::newContext(NpuContext *npuContext, ContextID *contextId)
+{
+ auto context = std::make_unique<Context>();
+ // TODO Consider the possibility of reusing the same address.
+ context->contextId = reinterpret_cast<ContextID>(context.get());
+ context->npuContext = npuContext;
+ *contextId = context->contextId;
+ _contexts.emplace_back(std::move(context));
+
+ this->listContexts();
+}
+
+void ContextManager::deleteContext(ContextID contextId)
+{
+ const auto iter =
+ std::remove_if(_contexts.begin(), _contexts.end(),
+ [&](std::unique_ptr<Context> &c) { return c->contextId == contextId; });
+ if (iter == _contexts.end())
+ {
+ return;
+ }
+
+ _contexts.erase(iter, _contexts.end());
+
+ this->listContexts();
+}
+
+void ContextManager::listContexts()
+{
+#ifdef DEBUG
+ VERBOSE(ContextManager) << "Size: " << _contexts.size() << std::endl;
+ for (const auto &context : _contexts)
+ {
+ VERBOSE(ContextManager) << "==========================" << std::endl;
+ VERBOSE(ContextManager) << "contextId: " << context->contextId << std::endl;
+ }
+ VERBOSE(ContextManager) << "==========================" << std::endl;
+#endif
+}
+
+const std::vector<std::unique_ptr<Context>>::iterator
+ContextManager::getContext(ContextID contextId)
+{
+ const auto iter =
+ std::find_if(_contexts.begin(), _contexts.end(),
+ [&](std::unique_ptr<Context> &c) { return c->contextId == contextId; });
+ return iter;
+}
+
+NpuContext *ContextManager::getNpuContext(ContextID contextId)
+{
+ const auto iter = getContext(contextId);
+ if (iter == _contexts.end())
+ {
+ return nullptr;
+ }
+
+ return iter->get()->npuContext;
+}
+
+} // namespace core
+} // namespace npud
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_CONTEXT_MANAGER_H__
+#define __ONE_SERVICE_NPUD_CORE_CONTEXT_MANAGER_H__
+
+#include "Backend.h"
+
+#include <vector>
+#include <memory>
+
+namespace npud
+{
+namespace core
+{
+
+using ContextID = uint64_t;
+struct Context
+{
+ // TODO Describe the variables
+ ContextID contextId;
+ NpuContext *npuContext;
+};
+
+class ContextManager
+{
+public:
+ ContextManager() noexcept;
+ ~ContextManager() noexcept;
+
+ ContextManager(const ContextManager &) = delete;
+ ContextManager &operator=(const ContextManager &) = delete;
+
+ void newContext(NpuContext *npuContext, ContextID *contextId);
+ void deleteContext(ContextID contextId);
+ const std::vector<std::unique_ptr<Context>>::iterator getContext(ContextID contextId);
+ NpuContext *getNpuContext(ContextID contextId);
+
+private:
+ void listContexts(void);
+
+private:
+ std::vector<std::unique_ptr<Context>> _contexts;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_CONTEXT_MANAGER_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Core.h"
+#include "util/Logging.h"
+
+namespace npud
+{
+namespace core
+{
+
+Core::Core() noexcept
+ : _devManager(std::make_unique<DevManager>()), _contextManager(std::make_unique<ContextManager>())
+{
+}
+
+void Core::init() { _devManager->loadModules(); }
+
+void Core::deinit() { _devManager->releaseModules(); }
+
+int Core::getAvailableDeviceList(std::vector<std::string> &list) const { return 0; }
+
+int Core::createContext(int deviceId, int priority, ContextID *contextId) const
+{
+ VERBOSE(Core) << "createContext with " << deviceId << ", " << priority << std::endl;
+ NpuContext *npuContext;
+ int ret = _devManager->createContext(deviceId, priority, &npuContext);
+ if (ret != NPU_STATUS_SUCCESS)
+ {
+ VERBOSE(Core) << "Fail to create dev context" << std::endl;
+ // TODO Define CoreStatus
+ return 1;
+ }
+
+ ContextID _contextId;
+ _contextManager->newContext(npuContext, &_contextId);
+ *contextId = _contextId;
+ return 0;
+}
+
+int Core::destroyContext(ContextID contextId) const
+{
+ VERBOSE(Core) << "destroyContext with " << contextId << std::endl;
+ NpuContext *npuContext = _contextManager->getNpuContext(contextId);
+ if (!npuContext)
+ {
+ VERBOSE(Core) << "Invalid context id" << std::endl;
+ // TODO Define CoreStatus
+ return 1;
+ }
+
+ int ret = _devManager->destroyContext(npuContext);
+ if (ret != NPU_STATUS_SUCCESS)
+ {
+ VERBOSE(Core) << "Failed to destroy npu context: " << ret << std::endl;
+ return 1;
+ }
+
+ _contextManager->deleteContext(contextId);
+ return 0;
+}
+
+int Core::createNetwork(ContextID contextId, const std::string &modelPath, ModelID *modelId) const
+{
+ VERBOSE(Core) << "createNetwork with " << contextId << ", " << modelPath << std::endl;
+ NpuContext *npuContext = _contextManager->getNpuContext(contextId);
+ if (!npuContext)
+ {
+ VERBOSE(Core) << "Invalid context id" << std::endl;
+ // TODO Define CoreStatus
+ return 1;
+ }
+
+ ModelID id;
+ int ret = _devManager->registerModel(npuContext, modelPath, &id);
+ if (ret != NPU_STATUS_SUCCESS)
+ {
+ VERBOSE(Core) << "Failed to register model: " << ret << std::endl;
+ return 1;
+ }
+
+ *modelId = id;
+ return 0;
+}
+
+int Core::destroyNetwork(ContextID contextId, ModelID modelId) const
+{
+ VERBOSE(Core) << "destroyNetwork with " << contextId << std::endl;
+ NpuContext *npuContext = _contextManager->getNpuContext(contextId);
+ if (!npuContext)
+ {
+ VERBOSE(Core) << "Invalid context id" << std::endl;
+ // TODO Define CoreStatus
+ return 1;
+ }
+
+ int ret = _devManager->unregisterModel(npuContext, modelId);
+ if (ret != NPU_STATUS_SUCCESS)
+ {
+ VERBOSE(Core) << "Failed to unregister model: " << ret << std::endl;
+ // TODO Define CoreStatus
+ return 1;
+ }
+
+ return 0;
+}
+
+int Core::createRequest(ContextID contextId, ModelID modelId, RequestID *requestId) const
+{
+ VERBOSE(Core) << "createRequest with " << contextId << ", " << modelId << std::endl;
+ NpuContext *npuContext = _contextManager->getNpuContext(contextId);
+ if (!npuContext)
+ {
+ VERBOSE(Core) << "Invalid context id" << std::endl;
+ // TODO Define CoreStatus
+ return 1;
+ }
+
+ RequestID id;
+ int ret = _devManager->createRequest(npuContext, modelId, &id);
+ if (ret != NPU_STATUS_SUCCESS)
+ {
+ VERBOSE(Core) << "Failed to create request of model: " << ret << std::endl;
+ // TODO Define CoreStatus
+ return 1;
+ }
+
+ *requestId = id;
+ return 0;
+}
+
+int Core::destroyRequest(ContextID contextId, RequestID requestId) const
+{
+ VERBOSE(Core) << "destroyRequest with " << contextId << ", " << requestId << std::endl;
+ NpuContext *npuContext = _contextManager->getNpuContext(contextId);
+ if (!npuContext)
+ {
+ VERBOSE(Core) << "Invalid context id" << std::endl;
+ // TODO Define CoreStatus
+ return 1;
+ }
+
+ int ret = _devManager->destroyRequest(npuContext, requestId);
+ if (ret != NPU_STATUS_SUCCESS)
+ {
+ VERBOSE(Core) << "Failed to destroy request: " << ret << std::endl;
+ // TODO Define CoreStatus
+ return 1;
+ }
+
+ return 0;
+}
+
+} // namespace core
+} // namespace npud
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_CORE_H__
+#define __ONE_SERVICE_NPUD_CORE_CORE_H__
+
+#include "DevManager.h"
+#include "ContextManager.h"
+
+#include <vector>
+#include <string>
+
+namespace npud
+{
+namespace core
+{
+
+// TODO Define error status
+
+class Core
+{
+public:
+ Core() noexcept;
+ ~Core() noexcept = default;
+
+ Core(const Core &) = delete;
+ Core &operator=(const Core &) = delete;
+
+ void init();
+ void deinit();
+
+ int getAvailableDeviceList(std::vector<std::string> &list) const;
+ int createContext(int deviceId, int priority, ContextID *contextId) const;
+ int destroyContext(ContextID contextId) const;
+ int createNetwork(ContextID contextId, const std::string &modelPath, ModelID *modelId) const;
+ int destroyNetwork(ContextID contextId, ModelID modelId) const;
+ int createRequest(ContextID contextId, ModelID modelId, RequestID *requestId) const;
+ int destroyRequest(ContextID contextId, RequestID requestId) const;
+
+private:
+ std::unique_ptr<DevManager> _devManager;
+ std::unique_ptr<ContextManager> _contextManager;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_CORE_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DBus.h"
+#include "Server.h"
+
+#include <atomic>
+#include <util/Logging.h>
+
+namespace npud
+{
+namespace core
+{
+
+std::atomic_bool DBus::_isReady(false);
+
+DBus::DBus() noexcept
+{
+ VERBOSE(DBus) << "Starting dbus service" << std::endl;
+
+ _gdbus_id = g_bus_own_name(G_BUS_TYPE_SYSTEM, "org.tizen.npud", G_BUS_NAME_OWNER_FLAGS_NONE,
+ (GBusAcquiredCallback)on_bus_acquired,
+ (GBusNameAcquiredCallback)on_name_acquired,
+ (GBusNameLostCallback)on_name_lost, NULL, NULL);
+}
+
+DBus::~DBus() noexcept
+{
+ VERBOSE(DBus) << "Stop dbus service" << std::endl;
+
+ g_bus_unown_name(_gdbus_id);
+}
+
+void DBus::on_bus_acquired(GDBusConnection *conn, const gchar *name, gpointer user_data)
+{
+ VERBOSE(DBus) << name << " on bus acquired" << std::endl;
+
+ GError *error = NULL;
+ NpudCore *core = npud_core_skeleton_new();
+ NpudCoreIface *iface = NPUD_CORE_GET_IFACE(core);
+
+ iface->handle_device_get_available_list = &on_handle_device_get_available_list;
+ iface->handle_context_create = &on_handle_context_create;
+ iface->handle_context_destroy = &on_handle_context_destroy;
+ iface->handle_buffers_create = &on_handle_buffers_create;
+ iface->handle_buffers_destroy = &on_handle_buffers_destroy;
+ iface->handle_network_create = &on_handle_network_create;
+ iface->handle_network_destroy = &on_handle_network_destroy;
+ iface->handle_request_create = &on_handle_request_create;
+ iface->handle_request_destroy = &on_handle_request_destroy;
+ iface->handle_request_set_data = &on_handle_request_set_data;
+ iface->handle_execute_run = &on_handle_execute_run;
+
+ if (!g_dbus_interface_skeleton_export(G_DBUS_INTERFACE_SKELETON(core), conn, "/org/tizen/npud",
+ &error))
+ {
+ VERBOSE(DBus) << "Failed to export skeleton, Server will stop." << std::endl;
+ Server::instance().stop();
+ }
+
+ _isReady.exchange(true);
+}
+
+void DBus::on_name_acquired(GDBusConnection *conn, const gchar *name, gpointer user_data)
+{
+ VERBOSE(DBus) << name << " on name acquired" << std::endl;
+}
+
+void DBus::on_name_lost(GDBusConnection *conn, const gchar *name, gpointer user_data)
+{
+ VERBOSE(DBus) << name << " on name lost, Server will stop." << std::endl;
+ Server::instance().stop();
+}
+
+gboolean DBus::on_handle_device_get_available_list(NpudCore *object,
+ GDBusMethodInvocation *invocation)
+{
+ VERBOSE(DBus) << __FUNCTION__ << std::endl;
+ std::vector<std::string> list;
+ int error = Server::instance().core().getAvailableDeviceList(list);
+ // TODO Implement variant outputs
+ npud_core_complete_device_get_available_list(object, invocation, error);
+ return TRUE;
+}
+
+gboolean DBus::on_handle_context_create(NpudCore *object, GDBusMethodInvocation *invocation,
+ gint arg_device_id, gint arg_priority)
+{
+ VERBOSE(DBus) << "on_handle_context_create with " << arg_device_id << ", " << arg_priority
+ << std::endl;
+
+ guint64 out_ctx = 0;
+ int ret = Server::instance().core().createContext(arg_device_id, arg_priority, &out_ctx);
+ npud_core_complete_context_create(object, invocation, out_ctx, ret);
+ return TRUE;
+}
+
+gboolean DBus::on_handle_context_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx)
+{
+ VERBOSE(DBus) << "on_handle_context_destroy with " << arg_ctx << std::endl;
+ int ret = Server::instance().core().destroyContext(arg_ctx);
+ npud_core_complete_context_destroy(object, invocation, ret);
+ return TRUE;
+}
+
+gboolean DBus::on_handle_buffers_create(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx, GVariant *arg_buffers)
+{
+ VERBOSE(DBus) << "on_handle_buffers_create with " << arg_ctx << std::endl;
+ GenericBuffers bufs;
+ GVariantIter *iter = NULL;
+ gint32 type;
+ guint64 addr;
+ guint32 size;
+ int index = 0;
+ g_variant_get(arg_buffers, "a(itu)", &iter);
+ while (iter != NULL && g_variant_iter_loop(iter, "(itu)", &type, &addr, &size))
+ {
+ VERBOSE(DBus) << "in [" << index << "] Type: " << type << ", Addr: " << addr
+ << ", Size: " << size << std::endl;
+ bufs.buffers[index].type = static_cast<BufferTypes>(type);
+ bufs.buffers[index].addr = reinterpret_cast<void *>(addr);
+ bufs.buffers[index].size = size;
+ index++;
+ }
+ bufs.numBuffers = index;
+ g_variant_iter_free(iter);
+
+ // TODO Invoke Core function.
+ int ret = -1;
+
+ GVariantBuilder *builder = g_variant_builder_new(G_VARIANT_TYPE("a(itu)"));
+
+ // TODO Enable below code when we can update ret value by core function
+ // if (ret == 0)
+ // {
+ // for (auto i = 0; i < bufs.numBuffers; ++i)
+ // {
+ // VERBOSE(DBus) << "out [" << index << "] Type: " << bufs.buffers[i].type
+ // << ", Addr: " << bufs.buffers[i].addr << ", Size: " << bufs.buffers[i].size
+ // << std::endl;
+ // g_variant_builder_add(builder, "(itu)", bufs.buffers[i].type, bufs.buffers[i].addr,
+ // bufs.buffers[i].size);
+ // }
+ // }
+ npud_core_complete_buffers_create(object, invocation, g_variant_builder_end(builder), ret);
+ return TRUE;
+}
+
+gboolean DBus::on_handle_buffers_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx, GVariant *arg_buffers)
+{
+ VERBOSE(DBus) << "on_handle_buffers_destroy with " << arg_ctx << std::endl;
+ GenericBuffers bufs;
+ GVariantIter *iter = NULL;
+ gint32 type;
+ guint64 addr;
+ guint32 size;
+ int index = 0;
+ g_variant_get(arg_buffers, "a(itu)", &iter);
+ while (iter != NULL && g_variant_iter_loop(iter, "(itu)", &type, &addr, &size))
+ {
+ VERBOSE(DBus) << "[" << index << "] Type: " << type << ", Addr: " << (void *)addr
+ << ", Size: " << size << std::endl;
+ bufs.buffers[index].type = static_cast<BufferTypes>(type);
+ bufs.buffers[index].addr = reinterpret_cast<void *>(addr);
+ bufs.buffers[index].size = size;
+ index++;
+ }
+ bufs.numBuffers = index;
+ g_variant_iter_free(iter);
+ // TODO Invoke Core function.
+ int ret = -1;
+ npud_core_complete_buffers_destroy(object, invocation, ret);
+ return TRUE;
+}
+
+gboolean DBus::on_handle_network_create(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx, const gchar *arg_model_path)
+{
+ VERBOSE(DBus) << "on_handle_network_create with " << arg_ctx << ", " << arg_model_path
+ << std::endl;
+ ModelID modelId = 0;
+ int ret = Server::instance().core().createNetwork(arg_ctx, arg_model_path, &modelId);
+ npud_core_complete_network_create(object, invocation, guint(modelId), ret);
+ return TRUE;
+}
+
+gboolean DBus::on_handle_network_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx, guint arg_nw_handle)
+{
+ VERBOSE(DBus) << "on_handle_network_destroy with " << arg_ctx << ", " << arg_nw_handle
+ << std::endl;
+ int ret = Server::instance().core().destroyNetwork(arg_ctx, arg_nw_handle);
+ npud_core_complete_network_destroy(object, invocation, ret);
+ return TRUE;
+}
+
+gboolean DBus::on_handle_request_create(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx, guint arg_nw_handle)
+{
+ VERBOSE(DBus) << "on_handle_request_create with " << arg_ctx << ", " << arg_nw_handle
+ << std::endl;
+ RequestID requestID = 0;
+ int ret = Server::instance().core().createRequest(arg_ctx, arg_nw_handle, &requestID);
+ npud_core_complete_request_create(object, invocation, guint(requestID), ret);
+ return TRUE;
+}
+
+gboolean DBus::on_handle_request_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx, guint arg_rq_handle)
+{
+ VERBOSE(DBus) << "on_handle_request_destroy with " << arg_ctx << ", " << arg_rq_handle
+ << std::endl;
+ int ret = Server::instance().core().destroyRequest(arg_ctx, arg_rq_handle);
+ npud_core_complete_request_destroy(object, invocation, ret);
+ return TRUE;
+}
+
+gboolean DBus::on_handle_request_set_data(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx, guint arg_rq_handle,
+ GVariant *arg_input_buffers, GVariant *arg_output_buffers)
+{
+ VERBOSE(DBus) << "on_handle_request_set_data with " << arg_ctx << ", " << arg_rq_handle
+ << std::endl;
+ GVariantIter *iter = NULL;
+ InputBuffers inBufs;
+ OutputBuffers outBufs;
+ gint32 type;
+ guint64 addr;
+ guint32 size;
+ int index = 0;
+
+ // inBufs
+ g_variant_get(arg_input_buffers, "a(itu)", &iter);
+ index = 0;
+ while (iter != NULL && g_variant_iter_loop(iter, "(itu)", &type, &addr, &size))
+ {
+ VERBOSE(DBus) << "in [" << index << "] Type: " << type << ", Addr: " << (void *)addr
+ << ", Size: " << size << std::endl;
+ if (type == 0) // NPU_BUFFER_MAPPED
+ {
+ inBufs.buffers[index].addr = reinterpret_cast<void *>(addr);
+ }
+ else if (type == 1) // NPU_BUFFER_DMABUF
+ {
+ // TODO Support dma buffer
+ VERBOSE(DBus) << "[NYI] NPU_BUFFER_DMABUF" << std::endl;
+ continue;
+ }
+ else
+ {
+ VERBOSE(DBus) << "Wrong buffer type. Ignored." << std::endl;
+ continue;
+ }
+ inBufs.buffers[index].size = size;
+ inBufs.buffers[index].type = static_cast<BufferTypes>(type);
+ index++;
+ }
+ inBufs.numBuffers = index;
+ g_variant_iter_free(iter);
+
+ // outBufs
+ g_variant_get(arg_output_buffers, "a(itu)", &iter);
+ index = 0;
+ while (iter != NULL && g_variant_iter_loop(iter, "(itu)", &type, &addr, &size))
+ {
+ VERBOSE(DBus) << "out [" << index << "] Type: " << type << ", Addr: " << (void *)addr
+ << ", Size: " << size << std::endl;
+ if (type == 0) // NPU_BUFFER_MAPPED
+ {
+ outBufs.buffers[index].addr = reinterpret_cast<void *>(addr);
+ }
+ else if (type == 1) // NPU_BUFFER_DMABUF
+ {
+ // TODO Support dma buffer
+ VERBOSE(DBus) << "[NYI] NPU_BUFFER_DMABUF" << std::endl;
+ continue;
+ }
+ else
+ {
+ VERBOSE(DBus) << "Wrong buffer type. Ignored." << std::endl;
+ continue;
+ }
+ outBufs.buffers[index].size = size;
+ outBufs.buffers[index].type = static_cast<BufferTypes>(type);
+ index++;
+ }
+ outBufs.numBuffers = index;
+ g_variant_iter_free(iter);
+
+ // TODO Invoke Core function.
+ int ret = -1;
+ npud_core_complete_request_set_data(object, invocation, ret);
+ return TRUE;
+}
+
+gboolean DBus::on_handle_execute_run(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx, guint arg_rq_handle)
+{
+ VERBOSE(DBus) << "on_handle_execute_run with " << arg_ctx << ", " << arg_rq_handle << std::endl;
+ // TODO Invoke Core function.
+ int ret = -1;
+ npud_core_complete_execute_run(object, invocation, ret);
+ return TRUE;
+}
+
+} // namespace core
+} // namespace npud
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_DBUS_H__
+#define __ONE_SERVICE_NPUD_CORE_DBUS_H__
+
+#include <dbus-core.h>
+#include <gio/gio.h>
+#include <memory>
+#include <atomic>
+
+namespace npud
+{
+namespace core
+{
+
+class DBus
+{
+public:
+ DBus() noexcept;
+ ~DBus() noexcept;
+
+ DBus(const DBus &) = delete;
+ DBus &operator=(const DBus &) = delete;
+
+ bool isReady() { return _isReady.load(); }
+
+ static void on_bus_acquired(GDBusConnection *conn, const gchar *name, gpointer user_data);
+ static void on_name_acquired(GDBusConnection *conn, const gchar *name, gpointer user_data);
+ static void on_name_lost(GDBusConnection *conn, const gchar *name, gpointer user_data);
+
+ static gboolean on_handle_device_get_available_list(NpudCore *object,
+ GDBusMethodInvocation *invocation);
+ static gboolean on_handle_context_create(NpudCore *object, GDBusMethodInvocation *invocation,
+ gint arg_device_id, gint arg_priority);
+ static gboolean on_handle_context_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx);
+ static gboolean on_handle_buffers_create(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx, GVariant *arg_buffers);
+ static gboolean on_handle_buffers_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx, GVariant *arg_buffers);
+ static gboolean on_handle_network_create(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx, const gchar *arg_model_path);
+ static gboolean on_handle_network_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx, guint arg_nw_handle);
+ static gboolean on_handle_request_create(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx, guint arg_nw_handle);
+ static gboolean on_handle_request_destroy(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx, guint arg_rq_handle);
+ static gboolean on_handle_request_set_data(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx, guint arg_rq_handle,
+ GVariant *arg_input_buffers,
+ GVariant *arg_output_buffers);
+ static gboolean on_handle_execute_run(NpudCore *object, GDBusMethodInvocation *invocation,
+ guint64 arg_ctx, guint arg_rq_handle);
+
+private:
+ guint _gdbus_id;
+ static std::atomic_bool _isReady;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_DBUS_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DevManager.h"
+#include "util/Logging.h"
+
+#include <dirent.h>
+
+namespace npud
+{
+namespace core
+{
+
+DevManager::DevManager()
+{
+ const auto env = util::getConfigString(util::config::DEVICE_MODULE_PATH);
+ _module_dir = std::move(env);
+}
+
+void DevManager::loadModules()
+{
+ VERBOSE(DevManager) << "load modules from " << _module_dir << std::endl;
+
+ releaseModules();
+
+ DIR *dir;
+ struct dirent *entry;
+
+ // NOTE
+ // Return NULL(0) value when opendir or readdir error occurs.
+ // NULL should be used instead of nullptr.
+ dir = opendir(_module_dir.c_str());
+ if (dir == NULL)
+ {
+ VERBOSE(DevManager) << "Fail to open module directory" << std::endl;
+ return;
+ }
+
+ while ((entry = readdir(dir)) != NULL)
+ {
+ std::string modulePath(entry->d_name);
+ if (modulePath.find("npud_backend") == std::string::npos)
+ {
+ continue;
+ }
+
+ DynamicLoader *loader = nullptr;
+ try
+ {
+ loader = new DynamicLoader(modulePath.c_str());
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(DevManager) << e.what() << std::endl;
+ continue;
+ }
+
+ std::unique_ptr<Device> dev = std::make_unique<Device>();
+ dev->modulePath = std::move(modulePath);
+ dev->loader = std::unique_ptr<DynamicLoader>(loader);
+
+ _dev = std::move(dev);
+ break;
+ }
+
+ closedir(dir);
+}
+
+void DevManager::releaseModules()
+{
+ if (_dev)
+ {
+ _dev.reset();
+ }
+}
+
+std::shared_ptr<Backend> DevManager::getBackend()
+{
+ if (!_dev)
+ {
+ throw std::runtime_error("No backend device.");
+ }
+ return _dev->loader->getInstance();
+}
+
+int DevManager::createContext(int deviceId, int priority, NpuContext **npuContext)
+{
+ try
+ {
+ return getBackend()->createContext(deviceId, priority, npuContext);
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(DevManager) << e.what() << std::endl;
+ return NPU_STATUS_ERROR_OPERATION_FAILED;
+ }
+}
+
+int DevManager::destroyContext(NpuContext *npuContext)
+{
+ try
+ {
+ return getBackend()->destroyContext(npuContext);
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(DevManager) << e.what() << std::endl;
+ return NPU_STATUS_ERROR_OPERATION_FAILED;
+ }
+}
+
+int DevManager::registerModel(NpuContext *npuContext, const std::string &modelPath,
+ ModelID *modelId)
+{
+ try
+ {
+ return getBackend()->registerModel(npuContext, modelPath, modelId);
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(DevManager) << e.what() << std::endl;
+ return NPU_STATUS_ERROR_OPERATION_FAILED;
+ }
+}
+
+int DevManager::unregisterModel(NpuContext *npuContext, ModelID modelId)
+{
+ try
+ {
+ return getBackend()->unregisterModel(npuContext, modelId);
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(DevManager) << e.what() << std::endl;
+ return NPU_STATUS_ERROR_OPERATION_FAILED;
+ }
+}
+
+int DevManager::createRequest(NpuContext *npuContext, ModelID modelId, RequestID *requestId)
+{
+ try
+ {
+ return getBackend()->createRequest(npuContext, modelId, requestId);
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(DevManager) << e.what() << std::endl;
+ return NPU_STATUS_ERROR_OPERATION_FAILED;
+ }
+}
+
+int DevManager::destroyRequest(NpuContext *npuContext, RequestID requestId)
+{
+ try
+ {
+ return getBackend()->destroyRequest(npuContext, requestId);
+ }
+ catch (const std::exception &e)
+ {
+ VERBOSE(DevManager) << e.what() << std::endl;
+ return NPU_STATUS_ERROR_OPERATION_FAILED;
+ }
+}
+
+} // namespace core
+} // namespace npud
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_DEV_MANAGER_H__
+#define __ONE_SERVICE_NPUD_CORE_DEV_MANAGER_H__
+
+#include "DynamicLoader.h"
+
+#include <memory>
+
+namespace npud
+{
+namespace core
+{
+
+struct Device
+{
+ std::string modulePath;
+ std::unique_ptr<DynamicLoader> loader;
+};
+
+class DevManager
+{
+public:
+ DevManager();
+ ~DevManager() = default;
+
+ DevManager(const DevManager &) = delete;
+ DevManager &operator=(const DevManager &) = delete;
+
+ void loadModules();
+ void releaseModules();
+ std::shared_ptr<Backend> getBackend();
+
+ int createContext(int deviceId, int priority, NpuContext **npuContext);
+ int destroyContext(NpuContext *npuContext);
+ int registerModel(NpuContext *npuContext, const std::string &modelPath, ModelID *modelId);
+ int unregisterModel(NpuContext *npuContext, ModelID modelId);
+ int createRequest(NpuContext *npuContext, ModelID modelId, RequestID *requestId);
+ int destroyRequest(NpuContext *npuContext, RequestID requestId);
+
+private:
+ std::unique_ptr<Device> _dev;
+ std::string _module_dir;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_DEV_MANAGER_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DynamicLoader.h"
+
+#include "util/Logging.h"
+
+namespace npud
+{
+namespace core
+{
+
+DynamicLoader::DynamicLoader(const char *file, int flags)
+ : _handle(nullptr), _filepath(file), _allocSymbol("allocate"), _deallocSymbol("deallocate")
+{
+ if (!(_handle = dlopen(_filepath.c_str(), flags)))
+ {
+ VERBOSE(DynamicLoader) << "Fail to load " << _filepath << " module: " << dlerror() << std::endl;
+ throw std::runtime_error("Fail to load " + _filepath + " module");
+ }
+
+ NpuAlloc alloc;
+ NpuDealloc dealloc;
+
+ alloc = reinterpret_cast<NpuAlloc>(dlsym(_handle, _allocSymbol.c_str()));
+ dealloc = reinterpret_cast<NpuDealloc>(dlsym(_handle, _deallocSymbol.c_str()));
+ if (!alloc || !dealloc)
+ {
+ VERBOSE(DynamicLoader) << "Fail to load " << _filepath << " symbol: " << dlerror() << std::endl;
+ dlclose(_handle);
+ throw std::runtime_error("Fail to load " + _filepath + " module");
+ }
+
+ _backend = std::shared_ptr<Backend>(alloc(), [dealloc](Backend *b) { dealloc(b); });
+}
+
+DynamicLoader::~DynamicLoader()
+{
+ // NOTE
+ // The _backend shared_ptr must be explicitly deleted before
+ // the dynamic library handle is released.
+ _backend.reset();
+ dlclose(_handle);
+}
+
+std::shared_ptr<Backend> DynamicLoader::getInstance() { return _backend; }
+
+} // namespace core
+} // namespace npud
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_DYNAMIC_LOADER_H__
+#define __ONE_SERVICE_NPUD_CORE_DYNAMIC_LOADER_H__
+
+#include "Backend.h"
+
+#include <dlfcn.h>
+#include <string>
+#include <memory>
+
+namespace npud
+{
+namespace core
+{
+
+using DLHandle = void *;
+
+class DynamicLoader
+{
+public:
+ DynamicLoader(const char *file, int flags = RTLD_LAZY);
+ ~DynamicLoader();
+
+ DynamicLoader(const DynamicLoader &) = delete;
+
+ std::shared_ptr<Backend> getInstance();
+
+private:
+ DLHandle _handle;
+ std::string _filepath;
+ std::string _allocSymbol;
+ std::string _deallocSymbol;
+ std::shared_ptr<Backend> _backend;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_DYNAMIC_LOADER_H__
*/
#include "Server.h"
+#include "util/Logging.h"
#include <thread>
-#include <util/Logging.h>
namespace npud
{
std::atomic_bool Server::_isRunning(false);
Server::Server() noexcept
- : _mainloop(g_main_loop_new(NULL, FALSE), g_main_loop_unref), _signal(std::make_unique<Signal>())
+ : _mainloop(g_main_loop_new(NULL, FALSE), g_main_loop_unref), _signal(std::make_unique<Signal>()),
+ _core(std::make_unique<Core>()), _dbus(std::make_unique<DBus>())
{
}
+bool Server::isServiceReady()
+{
+ if (!_isRunning.load())
+ {
+ VERBOSE(Server) << "Server is not started." << std::endl;
+ return false;
+ }
+
+ if (!_dbus->isReady())
+ {
+ VERBOSE(Server) << "DBus service is not ready." << std::endl;
+ return false;
+ }
+
+ return true;
+}
+
void Server::run(void)
{
VERBOSE(Server) << "Starting Server\n";
if (_isRunning.exchange(true))
{
- throw std::runtime_error("Mainloop is already running.");
+ return;
}
+ _core->init();
+
g_main_loop_run(_mainloop.get());
}
if (!_isRunning.load())
{
- throw std::runtime_error("Mainloop is not running");
+ return;
}
while (!g_main_loop_is_running(_mainloop.get()))
std::this_thread::yield();
}
+ _core->deinit();
+
g_main_loop_quit(_mainloop.get());
_isRunning = false;
}
#define __ONE_SERVICE_NPUD_CORE_SERVER_H__
#include "Signal.h"
+#include "Core.h"
+#include "DBus.h"
#include <glib.h>
#include <memory>
class Server
{
public:
+ Server(const Server &) = delete;
+ Server &operator=(const Server &) = delete;
+
void run(void);
void stop(void);
+ bool isRunning() { return _isRunning.load(); }
+ bool isServiceReady();
+
static Server &instance(void)
{
static Server server;
return server;
}
+ const Core &core(void) { return *_core.get(); }
+
private:
Server() noexcept;
std::unique_ptr<GMainLoop, void (*)(GMainLoop *)> _mainloop;
std::unique_ptr<Signal> _signal;
+ std::unique_ptr<Core> _core;
+ std::unique_ptr<DBus> _dbus;
};
} // namespace core
*/
#include "Signal.h"
-
#include "Server.h"
-#include <util/Logging.h>
+#include "util/Logging.h"
#include <csignal>
{
// NOTE Types of signals
// SIGTERM: termination request, sent to the program
- // SIGSEGV: invalid memory access (segmentation fault)
// SIGINT: external interrupt, usually initiated by the user
// SIGILL: invalid program image, such as invalid instruction
// SIGABRT: abnormal termination condition, as is e.g. initiated by std::abort()
// SIGFPE: erroneous arithmetic operation such as divide by zero
// from https://en.cppreference.com/w/cpp/utility/program/SIG_types
std::signal(SIGTERM, handleSignal);
- std::signal(SIGSEGV, handleSignal);
std::signal(SIGINT, handleSignal);
std::signal(SIGILL, handleSignal);
std::signal(SIGABRT, handleSignal);
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_IR_DATATYPE_H__
+#define __ONE_SERVICE_NPUD_CORE_IR_DATATYPE_H__
+
+#include <cstdlib>
+
+namespace npud
+{
+namespace core
+{
+namespace ir
+{
+
+enum class DataType
+{
+ INT8 = 0,
+ UINT8,
+ QUANT_UINT8_ASYMM,
+ INT16,
+ UINT16,
+ QUANT_INT16_SYMM,
+ INT32,
+ UINT32,
+ FLOAT32,
+ INT64,
+ UINT64,
+ FLOAT64,
+};
+
+} // namespace ir
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_IR_DATATYPE_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_IR_LAYOUT_H__
+#define __ONE_SERVICE_NPUD_CORE_IR_LAYOUT_H__
+
+#include <functional>
+#include <stdexcept>
+#include <string>
+
+namespace npud
+{
+namespace core
+{
+namespace ir
+{
+
+enum class Layout
+{
+ UNKNOWN = 0,
+ NHWC,
+ NCHW
+};
+
+} // namespace ir
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_IR_LAYOUT_H__
*/
#include "Server.h"
-
-#include <util/Logging.h>
+#include "util/Logging.h"
using namespace npud;
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CONFIG
+#error Define CONFIG before including this file
+#endif
+
+// Name | Type | Default
+CONFIG(NPUD_LOG_ENABLE , bool , "0")
+CONFIG(DEVICE_MODULE_PATH , std::string , "/usr/lib/npud/devices")
--- /dev/null
+<!DOCTYPE node PUBLIC "-//freedesktop//DTD D-BUS Object Introspection 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/introspect.dtd">
+<busconfig>
+ <policy context="default">
+ <allow own="org.tizen.npud"/>
+ <allow send_destination="org.tizen.npud"/>
+ <allow receive_sender="org.tizen.npud"/>
+ </policy>
+</busconfig>
--- /dev/null
+<!DOCTYPE node PUBLIC "-//freedesktop//DTD D-BUS Object Introspection 1.0//EN"
+ "http://www.freedesktop.org/standards/dbus/1.0/introspect.dtd">
+<node name="/">
+ <!-- org.tizen.npud.core:
+ @short_description: Npud interface
+
+ The interface used to run AI models on npu devices.
+ -->
+ <interface name="org.tizen.npud.core">
+ <!--
+ device_get_available_list:
+ @error: The error status of the function.
+
+ Get all available npu device lists.
+ -->
+ <method name="device_get_available_list">
+ <arg name="error" type="i" direction="out" />
+ </method>
+ <!--
+ context_create:
+ @device_id: The device numger to use.
+ @priority: The device priority.
+ @ctx: The Context handle.
+ @error: The error status of the function.
+
+ Create context.
+ -->
+ <method name="context_create">
+ <arg name="device_id" type="i" direction="in" />
+ <arg name="priority" type="i" direction="in" />
+ <arg name="ctx" type="t" direction="out" />
+ <arg name="error" type="i" direction="out" />
+ </method>
+ <!--
+ context_destroy:
+ @ctx: The Context handle to destroy.
+ @error: The error status of the function.
+
+ Destroy context.
+ -->
+ <method name="context_destroy">
+ <arg name="ctx" type="t" direction="in" />
+ <arg name="error" type="i" direction="out" />
+ </method>
+ <!--
+ buffers_create:
+ @ctx: The Context handle.
+ @buffers: The array of buffer structure. (i:type, t:address, u:size)
+ @out_buffers: The array of buffer sturcture containing created buffer address.
+ @error: The error status of the function.
+
+ Create buffer array.
+ -->
+ <method name="buffers_create">
+ <arg name="ctx" type="t" direction="in" />
+ <arg name="buffers" type="a(itu)" direction="in" />
+ <arg name="out_buffers" type="a(itu)" direction="out" />
+ <arg name="error" type="i" direction="out" />
+ </method>
+ <!--
+ buffers_destroy:
+ @ctx: The Context handle.
+ @buffers: The array of buffer structure. (i:type, t:address, u:size)
+ @error: The error status of the function.
+
+ Destroy buffer array.
+ -->
+ <method name="buffers_destroy">
+ <arg name="ctx" type="t" direction="in" />
+ <arg name="buffers" type="a(itu)" direction="in" />
+ <arg name="error" type="i" direction="out" />
+ </method>
+ <!--
+ network_create:
+ @ctx: The context handle.
+ @model_path: The model path to run.
+ @nw_handle: The Network handle.
+ @error: The error status of the function.
+
+ Create network.
+
+ TODO Support file descriptor input
+ -->
+ <method name="network_create">
+ <arg name="ctx" type="t" direction="in" />
+ <arg name="model_path" type="s" direction="in" />
+ <arg name="nw_handle" type="u" direction="out" />
+ <arg name="error" type="i" direction="out" />
+ </method>
+ <!--
+ network_destroy:
+ @ctx: The context handle.
+ @nw_handle: The Network handle.
+ @error: The error status of the function.
+
+ Destroy network.
+ -->
+ <method name="network_destroy">
+ <arg name="ctx" type="t" direction="in" />
+ <arg name="nw_handle" type="u" direction="in" />
+ <arg name="error" type="i" direction="out" />
+ </method>
+ <!--
+ request_create:
+ @ctx: The context handle.
+ @nw_handle: The Network handle.
+ @rq_handle: The Request handle.
+ @error: The error status of the function.
+
+ Create request.
+ -->
+ <method name="request_create">
+ <arg name="ctx" type="t" direction="in" />
+ <arg name="nw_handle" type="u" direction="in" />
+ <arg name="rq_handle" type="u" direction="out" />
+ <arg name="error" type="i" direction="out" />
+ </method>
+ <!--
+ request_destroy:
+ @ctx: The context handle.
+ @rq_handle: The Request handle.
+ @error: The error status of the function.
+
+ Destroy request.
+ -->
+ <method name="request_destroy">
+ <arg name="ctx" type="t" direction="in" />
+ <arg name="rq_handle" type="u" direction="in" />
+ <arg name="error" type="i" direction="out" />
+ </method>
+ <!--
+ request_set_data:
+ @ctx: The context handle.
+ @rq_handle: The Request handle.
+ @input_buffers: The input buffer datas.
+ @output_buffers: The output buffer datas.
+ @error: The error status of the function.
+
+ Set request data.
+ -->
+ <method name="request_set_data">
+ <arg name="ctx" type="t" direction="in" />
+ <arg name="rq_handle" type="u" direction="in" />
+ <arg name="input_buffers" type="a(itu)" direction="in" />
+ <arg name="output_buffers" type="a(itu)" direction="in" />
+ <arg name="error" type="i" direction="out" />
+ </method>
+ <!--
+ execute_run:
+ @ctx: The context handle.
+ @rq_handle: The Request handle.
+ @error: The error status of the function.
+
+ Execute run.
+ -->
+ <method name="execute_run">
+ <arg name="ctx" type="t" direction="in" />
+ <arg name="rq_handle" type="u" direction="in" />
+ <arg name="error" type="i" direction="out" />
+ </method>
+ </interface>
+</node>
--- /dev/null
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+file(GLOB_RECURSE TESTS "*.cc")
+
+add_executable(npud_gtest ${TESTS})
+
+set_target_properties(npud_gtest PROPERTIES LINKER_LANGUAGE CXX)
+target_include_directories(npud_gtest PUBLIC ${NPUD_INCLUDE_DIRS})
+target_include_directories(npud_gtest PUBLIC ${GLIB2.0_INCLUDE_DIRS})
+target_link_libraries(npud_gtest PRIVATE ${GLIB2.0_LIBRARIES})
+target_link_libraries(npud_gtest PRIVATE ${LIB_PTHREAD})
+target_link_libraries(npud_gtest PRIVATE npud_core)
+target_link_libraries(npud_gtest PRIVATE gtest_main dl)
+
+install(TARGETS npud_gtest DESTINATION npud-gtest)
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <core/Server.h>
+#include <gtest/gtest.h>
+#include <thread>
+#include <gio/gio.h>
+#include <dbus-core.h>
+#include <iostream>
+
+namespace
+{
+using namespace npud;
+using namespace core;
+
+//
+// DBusTest setup/teardown
+//
+class DBusTest : public ::testing::Test
+{
+protected:
+ static void runTask()
+ {
+ auto &server = Server::instance();
+ server.run();
+ }
+
+ void SetUp() override
+ {
+ std::thread child = std::thread(runTask);
+ child.detach();
+ auto &server = Server::instance();
+ while (server.isServiceReady() != true)
+ {
+ }
+ }
+
+ void TearDown() override
+ {
+ auto &server = Server::instance();
+ if (server.isRunning())
+ {
+ server.stop();
+ }
+ }
+
+ NpudCore *getProxy()
+ {
+ GError *error = nullptr;
+ NpudCore *proxy = nullptr;
+ proxy = npud_core_proxy_new_for_bus_sync(G_BUS_TYPE_SYSTEM, G_DBUS_PROXY_FLAGS_NONE,
+ "org.tizen.npud", "/org/tizen/npud", NULL, &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ return proxy;
+ }
+
+ const std::string &getModel()
+ {
+ if (model.empty())
+ {
+ auto model_path = std::getenv("GTEST_MODEL_PATH");
+ model = model_path + std::string("/mv1.q8/mv1.q8.tvn");
+ }
+ if (access(model.c_str(), F_OK) != 0)
+ {
+ model.clear();
+ }
+ return model;
+ }
+
+private:
+ std::string model;
+};
+
+//
+// DBusTest
+//
+TEST_F(DBusTest, get_proxy)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+}
+
+TEST_F(DBusTest, device_get_available_list)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+
+ GError *error = NULL;
+ gint out_error = -1;
+ npud_core_call_device_get_available_list_sync(proxy, &out_error, NULL, &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, context_create)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+
+ GError *error = NULL;
+ gint out_error = -1;
+ gint arg_device_id = 0;
+ gint arg_priority = 0;
+ guint64 out_ctx;
+ npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, context_destroy)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+
+ GError *error = NULL;
+ gint out_error = -1;
+ gint arg_device_id = 0;
+ gint arg_priority = 0;
+ guint64 out_ctx = 0;
+ npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_EQ(out_error, 0);
+
+ npud_core_call_context_destroy_sync(proxy, out_ctx, &out_error, NULL, &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_context_destroy_invalid_ctx)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+
+ GError *error = NULL;
+ gint out_error = -1;
+ guint64 out_ctx = 0;
+ npud_core_call_context_destroy_sync(proxy, out_ctx, &out_error, NULL, &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, network_create)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+
+ GError *error = NULL;
+ gint out_error = -1;
+ gint arg_device_id = 0;
+ gint arg_priority = 0;
+ guint64 out_ctx = 0;
+ npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ error = NULL;
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ const gchar *model_path = this->getModel().c_str();
+ guint out_nw_handle = 0;
+ npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_network_create_invalid_ctx)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+
+ GError *error = NULL;
+ gint out_error = -1;
+ guint64 out_ctx = -1;
+ const gchar *model_path = this->getModel().c_str();
+ guint out_nw_handle = 0;
+ npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_network_create_invalid_model)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+
+ GError *error = NULL;
+ gint out_error = -1;
+ gint arg_device_id = 0;
+ gint arg_priority = 0;
+ guint64 out_ctx = 0;
+ npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ error = NULL;
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ // Invalid model
+ const gchar *model_path = "invalid.tvn";
+ guint out_nw_handle = 0;
+ npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, network_destroy)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+
+ GError *error = NULL;
+ gint out_error = -1;
+ gint arg_device_id = 0;
+ gint arg_priority = 0;
+ guint64 out_ctx = 0;
+ npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ error = NULL;
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ const gchar *model_path = this->getModel().c_str();
+ guint out_nw_handle = 0;
+ npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ npud_core_call_network_destroy_sync(proxy, out_ctx, out_nw_handle, &out_error, NULL, &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_network_destroy_invalid_ctx)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+
+ GError *error = NULL;
+ gint out_error = -1;
+ gint arg_device_id = 0;
+ gint arg_priority = 0;
+ guint64 out_ctx = 0;
+ npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ error = NULL;
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ const gchar *model_path = this->getModel().c_str();
+ guint out_nw_handle = 0;
+ npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ // Invalid ctx
+ out_ctx = -1;
+ npud_core_call_network_destroy_sync(proxy, out_ctx, out_nw_handle, &out_error, NULL, &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_network_destroy_invalid_nw_handle)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+
+ GError *error = NULL;
+ gint out_error = -1;
+ gint arg_device_id = 0;
+ gint arg_priority = 0;
+ guint64 out_ctx = 0;
+ npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ error = NULL;
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ guint out_nw_handle = -1;
+ npud_core_call_network_destroy_sync(proxy, out_ctx, out_nw_handle, &out_error, NULL, &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, request_create)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+
+ GError *error = NULL;
+ gint out_error = -1;
+ gint arg_device_id = 0;
+ gint arg_priority = 0;
+ guint64 out_ctx = 0;
+ npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ error = NULL;
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ const gchar *model_path = this->getModel().c_str();
+ guint out_nw_handle = 0;
+ npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ error = NULL;
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ guint out_rq_handle = 0;
+ npud_core_call_request_create_sync(proxy, out_ctx, out_nw_handle, &out_rq_handle, &out_error,
+ NULL, &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_request_create_invalid_ctx)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+
+ GError *error = NULL;
+ gint out_error = -1;
+ gint arg_device_id = 0;
+ gint arg_priority = 0;
+ guint64 out_ctx = 0;
+ npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ error = NULL;
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ const gchar *model_path = this->getModel().c_str();
+ guint out_nw_handle = 0;
+ npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ error = NULL;
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ guint out_rq_handle = 0;
+ npud_core_call_request_create_sync(proxy, 0, out_nw_handle, &out_rq_handle, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_request_create_invalid_nw)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+
+ GError *error = NULL;
+ gint out_error = -1;
+ gint arg_device_id = 0;
+ gint arg_priority = 0;
+ guint64 out_ctx = 0;
+ npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ error = NULL;
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ guint out_rq_handle = 0;
+ npud_core_call_request_create_sync(proxy, out_ctx, 0, &out_rq_handle, &out_error, NULL, &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, request_destroy)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+
+ GError *error = NULL;
+ gint out_error = -1;
+ gint arg_device_id = 0;
+ gint arg_priority = 0;
+ guint64 out_ctx = 0;
+ npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ error = NULL;
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ const gchar *model_path = this->getModel().c_str();
+ guint out_nw_handle = 0;
+ npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ error = NULL;
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ guint out_rq_handle = 0;
+ npud_core_call_request_create_sync(proxy, out_ctx, out_nw_handle, &out_rq_handle, &out_error,
+ NULL, &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ npud_core_call_request_destroy_sync(proxy, out_ctx, out_rq_handle, &out_error, NULL, &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_EQ(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_request_destroy_invalid_ctx)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+
+ GError *error = NULL;
+ gint out_error = -1;
+ gint arg_device_id = 0;
+ gint arg_priority = 0;
+ guint64 out_ctx = 0;
+ npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ error = NULL;
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ const gchar *model_path = this->getModel().c_str();
+ guint out_nw_handle = 0;
+ npud_core_call_network_create_sync(proxy, out_ctx, model_path, &out_nw_handle, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ error = NULL;
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ guint out_rq_handle = 0;
+ npud_core_call_request_create_sync(proxy, out_ctx, out_nw_handle, &out_rq_handle, &out_error,
+ NULL, &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ npud_core_call_request_destroy_sync(proxy, 0, out_rq_handle, &out_error, NULL, &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_NE(out_error, 0);
+}
+
+TEST_F(DBusTest, neg_request_destroy_invalid_rq)
+{
+ NpudCore *proxy = this->getProxy();
+ ASSERT_NE(proxy, nullptr);
+
+ GError *error = NULL;
+ gint out_error = -1;
+ gint arg_device_id = 0;
+ gint arg_priority = 0;
+ guint64 out_ctx = 0;
+ npud_core_call_context_create_sync(proxy, arg_device_id, arg_priority, &out_ctx, &out_error, NULL,
+ &error);
+ if (error)
+ {
+ g_error_free(error);
+ error = NULL;
+ }
+ ASSERT_EQ(out_error, 0);
+
+ out_error = -1;
+ npud_core_call_request_destroy_sync(proxy, out_ctx, 0, &out_error, NULL, &error);
+ if (error)
+ {
+ g_error_free(error);
+ }
+ ASSERT_NE(out_error, 0);
+}
+
+} // unnamed namespace
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/Server.h"
+
+#include <gtest/gtest.h>
+#include <thread>
+
+namespace
+{
+using namespace npud;
+using namespace core;
+
+//
+// ServerTest setup/teardown
+//
+class ServerTest : public ::testing::Test
+{
+protected:
+ static void runTask()
+ {
+ auto &server = Server::instance();
+ server.run();
+ }
+
+ void SetUp() override
+ {
+ std::thread child = std::thread(runTask);
+ child.detach();
+ auto &server = Server::instance();
+ while (server.isRunning() != true)
+ {
+ }
+ }
+
+ void TearDown() override
+ {
+ auto &server = Server::instance();
+ if (server.isRunning())
+ {
+ server.stop();
+ }
+ }
+};
+
+//
+// ServerTest
+//
+TEST_F(ServerTest, run)
+{
+ auto &server = Server::instance();
+ ASSERT_EQ(server.isRunning(), true);
+}
+
+TEST_F(ServerTest, stop)
+{
+ auto &server = Server::instance();
+ server.stop();
+ ASSERT_EQ(server.isRunning(), false);
+}
+
+} // unnamed namespace
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/Server.h"
+#include "core/Signal.h"
+
+#include <gtest/gtest.h>
+#include <thread>
+#include <csignal>
+
+namespace
+{
+using namespace npud;
+using namespace core;
+
+//
+// SignalTest setup/teardown
+//
+class SignalTest : public ::testing::Test
+{
+protected:
+ static void runTask()
+ {
+ auto &server = Server::instance();
+ server.run();
+ }
+
+ void SetUp() override
+ {
+ std::thread child = std::thread(runTask);
+ child.detach();
+ auto &server = Server::instance();
+ while (server.isRunning() != true)
+ {
+ }
+ }
+
+ void TearDown() override
+ {
+ auto &server = Server::instance();
+ if (server.isRunning())
+ {
+ server.stop();
+ }
+ }
+};
+
+//
+// SignalTest
+//
+TEST_F(SignalTest, raise_SIGTERM)
+{
+ auto &server = Server::instance();
+ ASSERT_EQ(server.isRunning(), true);
+ std::raise(SIGTERM);
+ ASSERT_EQ(server.isRunning(), false);
+}
+
+TEST_F(SignalTest, raise_SIGINT)
+{
+ auto &server = Server::instance();
+ ASSERT_EQ(server.isRunning(), true);
+ std::raise(SIGINT);
+ ASSERT_EQ(server.isRunning(), false);
+}
+
+TEST_F(SignalTest, raise_SIGILL)
+{
+ auto &server = Server::instance();
+ ASSERT_EQ(server.isRunning(), true);
+ std::raise(SIGILL);
+ ASSERT_EQ(server.isRunning(), false);
+}
+
+TEST_F(SignalTest, raise_SIGABRT)
+{
+ auto &server = Server::instance();
+ ASSERT_EQ(server.isRunning(), true);
+ std::raise(SIGABRT);
+ ASSERT_EQ(server.isRunning(), false);
+}
+
+TEST_F(SignalTest, raise_SIGFPE)
+{
+ auto &server = Server::instance();
+ ASSERT_EQ(server.isRunning(), true);
+ std::raise(SIGFPE);
+ ASSERT_EQ(server.isRunning(), false);
+}
+
+} // unnamed namespace
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef CONFIG
-#error Define CONFIG before including this file
-#endif
-
-// Name | Type | Default
-CONFIG(NPUD_LOG_ENABLE , bool , "0")
return()
endif(NOT BUILD_ONERT)
-if (ANDROID_BOOST_ROOT)
- set(BOOST_ROOT ${ANDROID_BOOST_ROOT})
-endif (ANDROID_BOOST_ROOT)
-
-nnfw_find_package(Boost REQUIRED)
nnfw_find_package(GTest)
+# NNAPI gtest requires c++17
+set(CMAKE_CXX_STANDARD 17)
set(GENERATED_CPPS "${CMAKE_CURRENT_SOURCE_DIR}/src/generated/all_generated_V1_2_cts_tests.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/src/generated/all_generated_V1_1_cts_tests.cpp"
set(RUNTIME_NNAPI_TEST_SRC_INC ${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/src)
target_include_directories(${RUNTIME_NNAPI_TEST} PRIVATE ${RUNTIME_NNAPI_TEST_SRC_INC})
-target_include_directories(${RUNTIME_NNAPI_TEST} PRIVATE ${Boost_INCLUDE_DIRS})
# Define NNTEST_ONLY_PUBLIC_API to avoid android dependency
target_compile_definitions(${RUNTIME_NNAPI_TEST} PRIVATE NNTEST_ONLY_PUBLIC_API)
target_link_libraries(${RUNTIME_NNAPI_TEST} gtest gmock)
target_link_libraries(${RUNTIME_NNAPI_TEST} ${LIB_PTHREAD} dl)
-install(TARGETS ${RUNTIME_NNAPI_TEST} DESTINATION unittest)
+install(TARGETS ${RUNTIME_NNAPI_TEST} DESTINATION nnapi-gtest)
# Default test backend: cpu
set(SKIPLIST_FILE_NAME ${RUNTIME_NNAPI_TEST}.skip.${TARGET_PLATFORM}.cpu)
install(FILES ${SKIPLIST_FILE_NAME}
- DESTINATION unittest
+ DESTINATION nnapi-gtest
RENAME ${RUNTIME_NNAPI_TEST}.skip
OPTIONAL)
# Install skiplist file for target as backup
FILE(GLOB SKIPLIST_TARGET ${CMAKE_CURRENT_SOURCE_DIR}/${RUNTIME_NNAPI_TEST}.skip.${TARGET_PLATFORM}*)
-FILE(GLOB SKIPLIST_NOARCH ${CMAKE_CURRENT_SOURCE_DIR}/${RUNTIME_NNAPI_TEST}.skip.noarch.*)
-list(APPEND SKIPLIST_ALL_RUNTIME ${SKIPLIST_TARGET} ${SKIPLIST_NOARCH})
-install(FILES ${SKIPLIST_ALL_RUNTIME} DESTINATION unittest OPTIONAL)
+install(FILES ${SKIPLIST_TARGET} DESTINATION nnapi-gtest OPTIONAL)
#include "NeuralNetworksExShim.h"
#include <math.h>
-// Fix for onert: use boost::optional instead of std::optional
-// TODO in onert: introduce and use internal optional library
-#include <boost/optional.hpp>
+#include <optional>
#include <string>
#include <vector>
struct OperandType {
ANeuralNetworksOperandType operandType;
std::vector<uint32_t> dimensions;
- // Fix for onert:
- // Use boost::optional instead of std::optional
- // Default value: std::nullopt -> boost::none
- boost::optional<SymmPerChannelQuantParams> channelQuant;
+ std::optional<SymmPerChannelQuantParams> channelQuant;
OperandType(const OperandType& other)
: operandType(other.operandType),
}
OperandType(Type type, std::vector<uint32_t> d, float scale = 0.0f, int32_t zeroPoint = 0)
- : dimensions(std::move(d)), channelQuant(boost::none) {
+ : dimensions(std::move(d)), channelQuant(std::nullopt) {
operandType = {
.type = static_cast<int32_t>(type),
.dimensionCount = static_cast<uint32_t>(dimensions.size()),
+++ /dev/null
-GeneratedTests.abs_
-GeneratedTests.abs_1D_float_nnfw
-GeneratedTests.abs_2D_float_nnfw
-GeneratedTests.abs_3D_float_nnfw
-GeneratedTests.abs_4D_float_nnfw
-GeneratedTests.abs_dynamic_nnfw
-GeneratedTests.add_broadcast_quant8
-GeneratedTests.add_dynamic_nnfw
-GeneratedTests.add_quant8
-GeneratedTests.argmax_1
-GeneratedTests.argmax_1_quant8
-GeneratedTests.argmax_2
-GeneratedTests.argmax_2_quant8
-GeneratedTests.argmax_3
-GeneratedTests.argmax_3_axis_as_input_nnfw
-GeneratedTests.argmax_3_axis_as_input_nnfw_quant8
-GeneratedTests.argmax_3_quant8
-GeneratedTests.argmax_dynamic_nnfw
-GeneratedTests.argmax_float_1_nnfw
-GeneratedTests.argmax_float_2_nnfw
-GeneratedTests.argmax_int32_nnfw
-GeneratedTests.argmax_neg_axis_float_nnfw
-GeneratedTests.argmax_neg_axis_int32_nnfw
-GeneratedTests.argmax_quant8_neg_axis_nnfw
-GeneratedTests.argmax_quant8_nnfw
-GeneratedTests.argmin_1
-GeneratedTests.argmin_1_quant8
-GeneratedTests.argmin_2
-GeneratedTests.argmin_2_quant8
-GeneratedTests.argmin_3
-GeneratedTests.argmin_3_quant8
-GeneratedTests.avg_pool_quant8_1
-GeneratedTests.avg_pool_quant8_2
-GeneratedTests.avg_pool_quant8_3
-GeneratedTests.avg_pool_quant8_4
-GeneratedTests.avg_pool_quant8_5
-GeneratedTests.batch_matmul_ex_dynamic_nnfw
-GeneratedTests.batch_matmul_ex_float_adj_x
-GeneratedTests.batch_matmul_ex_float_adj_y
-GeneratedTests.batch_matmul_ex_float_batch2
-GeneratedTests.batch_matmul_ex_float_broadcast
-GeneratedTests.batch_matmul_ex_float_broadcast2_adj_xy
-GeneratedTests.batch_matmul_ex_float_broadcast_adj_x
-GeneratedTests.batch_matmul_ex_float_simple
-GeneratedTests.batch_to_space
-GeneratedTests.batch_to_space_float_1
-GeneratedTests.batch_to_space_quant8_1
-GeneratedTests.broadcast_to_ex_1D_nnfw
-GeneratedTests.broadcast_to_ex_2D_nnfw
-GeneratedTests.broadcast_to_ex_dynamic_2D_nnfw
-GeneratedTests.broadcast_to_ex_dynamic_3D_nnfw
-GeneratedTests.cast_dynamic_float32_to_int32_nnfw
-GeneratedTests.cast_float16_to_float16
-GeneratedTests.cast_float16_to_float32
-GeneratedTests.cast_float16_to_float32_relaxed
-GeneratedTests.cast_float16_to_int32
-GeneratedTests.cast_float16_to_quant8
-GeneratedTests.cast_float16_to_quant8_overflow
-GeneratedTests.cast_float32_to_float16
-GeneratedTests.cast_float32_to_float16_relaxed
-GeneratedTests.cast_float32_to_float32
-GeneratedTests.cast_float32_to_float32_relaxed
-GeneratedTests.cast_float32_to_int32
-GeneratedTests.cast_float32_to_int32_nnfw
-GeneratedTests.cast_float32_to_int32_relaxed
-GeneratedTests.cast_float32_to_quant8
-GeneratedTests.cast_float32_to_quant8_overflow
-GeneratedTests.cast_float32_to_quant8_overflow_relaxed
-GeneratedTests.cast_float32_to_quant8_relaxed
-GeneratedTests.cast_int32_to_float16
-GeneratedTests.cast_int32_to_float32
-GeneratedTests.cast_int32_to_float32_nnfw
-GeneratedTests.cast_int32_to_float32_relaxed
-GeneratedTests.cast_int32_to_int32
-GeneratedTests.cast_int32_to_quant8
-GeneratedTests.cast_int32_to_quant8_overflow
-GeneratedTests.cast_quant8_to_float16
-GeneratedTests.cast_quant8_to_float32
-GeneratedTests.cast_quant8_to_float32_relaxed
-GeneratedTests.cast_quant8_to_int32
-GeneratedTests.cast_quant8_to_quant8
-GeneratedTests.concat_dynamic_nnfw
-GeneratedTests.concat_quant8_1
-GeneratedTests.concat_quant8_2
-GeneratedTests.concat_quant8_3
-GeneratedTests.conv_dynamic_nnfw
-GeneratedTests.conv_quant8
-GeneratedTests.conv_quant8_2
-GeneratedTests.conv_quant8_channels
-GeneratedTests.conv_quant8_channels_weights_as_inputs
-GeneratedTests.conv_quant8_large
-GeneratedTests.conv_quant8_large_weights_as_inputs
-GeneratedTests.conv_quant8_overflow
-GeneratedTests.conv_quant8_overflow_weights_as_inputs
-GeneratedTests.conv_quant8_weights_as_inputs
-GeneratedTests.conv2d_dilation_nnfw
-GeneratedTests.conv2d_dilation_nnfw_quant8
-GeneratedTests.conv2d_dilation_nnfw_weight_as_input
-GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
-GeneratedTests.conv2d_dilation_nnfw_2
-GeneratedTests.conv2d_dilation_nnfw_quant8_2
-GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
-GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
-GeneratedTests.cos_ex_1D_float_nnfw
-GeneratedTests.cos_ex_4D_float_nnfw
-GeneratedTests.cos_ex_dynamic_nnfw
-GeneratedTests.depth_to_space_float_1
-GeneratedTests.depth_to_space_float_2
-GeneratedTests.depth_to_space_float_3
-GeneratedTests.depth_to_space_quant8_1
-GeneratedTests.depth_to_space_quant8_2
-GeneratedTests.depthwise_conv2d_quant8
-GeneratedTests.depthwise_conv2d_quant8_2
-GeneratedTests.depthwise_conv2d_quant8_large
-GeneratedTests.depthwise_conv2d_quant8_large_weights_as_inputs
-GeneratedTests.depthwise_conv2d_quant8_weights_as_inputs
-GeneratedTests.dequantize
-GeneratedTests.dequantize_v1_2_1d_quant8_asymm
-GeneratedTests.dequantize_v1_2_2d_quant8_asymm
-GeneratedTests.dequantize_v1_2_3d_quant8_symm
-GeneratedTests.dequantize_v1_2_4d_quant8_symm
-GeneratedTests.dequantize_v1_2_3d_per_channel_first_dim
-GeneratedTests.dequantize_v1_2_3d_per_channel_second_dim
-GeneratedTests.dequantize_v1_2
-GeneratedTests.dequantize_v1_2_zero_sized
-GeneratedTests.dequantize_v1_2_zero_sized_float16
-GeneratedTests.div_
-GeneratedTests.div_broadcast_float
-GeneratedTests.div_broadcast_float_4D_2D_nnfw
-GeneratedTests.div_dynamic_nnfw
-GeneratedTests.einsum_ex_float_matmul_2x2_2
-GeneratedTests.einsum_ex_float_matmul_3x2_3
-GeneratedTests.einsum_ex_float_matmul_3x3_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4
-GeneratedTests.einsum_ex_float_matmul_4x4_4_2
-GeneratedTests.embedding_lookup
-GeneratedTests.embedding_lookup_2d_nnfw
-GeneratedTests.embedding_lookup_4d_nnfw
-GeneratedTests.equal_1D_float_nnfw
-GeneratedTests.equal_4D_float_nnfw
-GeneratedTests.equal_boolean
-GeneratedTests.equal_broadcast
-GeneratedTests.equal_broadcast_4D_2D_float_nnfw
-GeneratedTests.equal_broadcast_float_nnfw
-GeneratedTests.equal_broadcast_quant8_nnfw
-GeneratedTests.equal_dynamic_float_nnfw
-GeneratedTests.equal_quant8_nnfw
-GeneratedTests.equal_quantized_different_scale
-GeneratedTests.equal_quantized_different_zero_point
-GeneratedTests.equal_quantized_overflow_first_input_if_requantized
-GeneratedTests.equal_quantized_overflow_second_input_if_requantized
-GeneratedTests.equal_simple
-GeneratedTests.exp_
-GeneratedTests.exp_1D_float_nnfw
-GeneratedTests.exp_2D_float_nnfw
-GeneratedTests.exp_3D_float_nnfw
-GeneratedTests.exp_4D_float_nnfw
-GeneratedTests.exp_dynamic_nnfw
-GeneratedTests.expand_dims
-GeneratedTests.expand_dims_2
-GeneratedTests.expand_dims_3
-GeneratedTests.expand_dims_4
-GeneratedTests.expand_dims_dynamic_nnfw_1
-GeneratedTests.expand_dims_dynamic_nnfw_2
-GeneratedTests.expand_dims_int32
-GeneratedTests.expand_dims_int32_2
-GeneratedTests.expand_dims_int32_3
-GeneratedTests.expand_dims_int32_4
-GeneratedTests.expand_dims_quant8
-GeneratedTests.expand_dims_quant8_2
-GeneratedTests.expand_dims_quant8_3
-GeneratedTests.expand_dims_quant8_4
-GeneratedTests.fill_ex_1D_float
-GeneratedTests.fill_ex_4D_float
-GeneratedTests.fill_ex_dynamic_nnfw
-GeneratedTests.floor_
-GeneratedTests.fully_connected_dynamic_nnfw
-GeneratedTests.fully_connected_hybrid_1_nnfw
-GeneratedTests.fully_connected_hybrid_2_nnfw
-GeneratedTests.fully_connected_quant8
-GeneratedTests.fully_connected_quant8_2
-GeneratedTests.fully_connected_quant8_large
-GeneratedTests.fully_connected_quant8_large_weights_as_inputs
-GeneratedTests.fully_connected_quant8_weights_as_inputs
-GeneratedTests.fusedbatchnorm_ex_dynamic_nnfw
-GeneratedTests.fusedbatchnorm_ex_float_fusedbatchnorm_1141
-GeneratedTests.gather_dynamic_nnfw
-GeneratedTests.gather_float16
-GeneratedTests.gather_float16_2
-GeneratedTests.gather_float16_3
-GeneratedTests.gather_float16_4
-GeneratedTests.gather_float16_5
-GeneratedTests.gather_float16_6
-GeneratedTests.gather_float16_7
-GeneratedTests.gather_float16_8
-GeneratedTests.greater_broadcast_quant8_nnfw
-GeneratedTests.greater_dynamic_float_nnfw
-GeneratedTests.greater_equal_boolean
-GeneratedTests.greater_equal_broadcast
-GeneratedTests.greater_equal_broadcast_quant8_nnfw
-GeneratedTests.greater_equal_dynamic_float_nnfw
-GeneratedTests.greater_equal_nnfw
-GeneratedTests.greater_equal_quant8_nnfw
-GeneratedTests.greater_equal_quantized_different_scale
-GeneratedTests.greater_equal_quantized_different_zero_point
-GeneratedTests.greater_equal_quantized_overflow_first_input_if_requantized
-GeneratedTests.greater_equal_quantized_overflow_second_input_if_requantized
-GeneratedTests.greater_equal_simple
-GeneratedTests.greater_quant8_nnfw
-GeneratedTests.hashtable_lookup_float
-GeneratedTests.hashtable_lookup_float_4D_nnfw
-GeneratedTests.hashtable_lookup_quant8
-GeneratedTests.l2_normalization
-GeneratedTests.l2_normalization_2
-GeneratedTests.l2_normalization_large
-GeneratedTests.l2_normalization_quant8_nnfw
-GeneratedTests.l2_pool_float
-GeneratedTests.l2_pool_float_2
-GeneratedTests.l2_pool_float_large
-GeneratedTests.less_boolean
-GeneratedTests.less_broadcast
-GeneratedTests.less_broadcast_quant8_nnfw
-GeneratedTests.less_dynamic_float_nnfw
-GeneratedTests.less_equal_broadcast_quant8_nnfw
-GeneratedTests.less_equal_dynamic_float_nnfw
-GeneratedTests.less_equal_quant8_nnfw
-GeneratedTests.less_nnfw
-GeneratedTests.less_quant8_nnfw
-GeneratedTests.less_quantized_different_scale
-GeneratedTests.less_quantized_different_zero_point
-GeneratedTests.less_quantized_overflow_first_input_if_requantized
-GeneratedTests.less_quantized_overflow_second_input_if_requantized
-GeneratedTests.less_simple
-GeneratedTests.local_response_norm_float_1
-GeneratedTests.local_response_norm_float_2
-GeneratedTests.local_response_norm_float_3
-GeneratedTests.local_response_norm_float_4
-GeneratedTests.log_4D_float_nnfw
-GeneratedTests.log_dynamic_nnfw
-GeneratedTests.log_softmax_nnfw
-GeneratedTests.log_softmax_nnfw_2
-GeneratedTests.log_softmax_nnfw_3
-GeneratedTests.log_softmax_nnfw_4
-GeneratedTests.log_softmax_nnfw_5
-GeneratedTests.log_softmax_nnfw_quant8
-GeneratedTests.logical_and_1D_nnfw
-GeneratedTests.logical_and_2D_nnfw
-GeneratedTests.logical_and_3D_nnfw
-GeneratedTests.logical_and_4D_nnfw
-GeneratedTests.logical_and_broadcast
-GeneratedTests.logical_and_broadcast_4D_2D_nnfw
-GeneratedTests.logical_and_broadcast_nnfw
-GeneratedTests.logical_and_simple
-GeneratedTests.logical_not
-GeneratedTests.logical_not_1D_nnfw
-GeneratedTests.logical_not_4D_nnfw
-GeneratedTests.logical_not_dynamic_nnfw
-GeneratedTests.logical_or_1D_nnfw
-GeneratedTests.logical_or_2D_nnfw
-GeneratedTests.logical_or_3D_nnfw
-GeneratedTests.logical_or_4D_nnfw
-GeneratedTests.logical_or_broadcast
-GeneratedTests.logical_or_broadcast_4D_2D_nnfw
-GeneratedTests.logical_or_broadcast_nnfw
-GeneratedTests.logical_or_dynamic_nnfw
-GeneratedTests.logical_or_simple
-GeneratedTests.logistic_dynamic_nnfw
-GeneratedTests.logistic_quant8_1
-GeneratedTests.logistic_quant8_2
-GeneratedTests.lsh_projection
-GeneratedTests.lsh_projection_2
-GeneratedTests.lsh_projection_weights_as_inputs
-GeneratedTests.lstm
-GeneratedTests.lstm2
-GeneratedTests.lstm2_state
-GeneratedTests.lstm2_state2
-GeneratedTests.lstm3
-GeneratedTests.lstm3_state
-GeneratedTests.lstm3_state2
-GeneratedTests.lstm3_state3
-GeneratedTests.lstm_state
-GeneratedTests.lstm_state2
-GeneratedTests.matrix_band_part_ex_4D_float
-GeneratedTests.matrix_band_part_ex_dynamic_nnfw
-GeneratedTests.max_pool_quant8_1
-GeneratedTests.max_pool_quant8_2
-GeneratedTests.max_pool_quant8_3
-GeneratedTests.max_pool_quant8_4
-GeneratedTests.maximum_broadcast
-GeneratedTests.maximum_broadcast_quant8
-GeneratedTests.maximum_dynamic_nnfw
-GeneratedTests.maximum_overflow
-GeneratedTests.maximum_quant8_nnfw
-GeneratedTests.maximum_simple
-GeneratedTests.maximum_simple_quant8
-GeneratedTests.mean
-GeneratedTests.mean_4D_float_reducing_C_nnfw
-GeneratedTests.mean_4D_float_reducing_HW_nnfw
-GeneratedTests.mean_axis01_1_nnfw
-GeneratedTests.mean_axis01_2_nnfw
-GeneratedTests.mean_float_1
-GeneratedTests.mean_float_2
-GeneratedTests.mean_quant8_1
-GeneratedTests.mean_quant8_2
-GeneratedTests.minimum_broadcast
-GeneratedTests.minimum_broadcast_quant8
-GeneratedTests.minimum_dynamic_nnfw
-GeneratedTests.minimum_overflow
-GeneratedTests.minimum_quant8_nnfw
-GeneratedTests.minimum_simple
-GeneratedTests.minimum_simple_quant8
-GeneratedTests.minimum_int32
-GeneratedTests.mul_broadcast_quant8
-GeneratedTests.mul_dynamic_nnfw
-GeneratedTests.mul_quant8
-GeneratedTests.neg
-GeneratedTests.neg_1D_float_nnfw
-GeneratedTests.neg_2D_float_nnfw
-GeneratedTests.neg_3D_float_nnfw
-GeneratedTests.neg_3D_int_nnfw
-GeneratedTests.neg_4D_float_nnfw
-GeneratedTests.neg_4D_int_nnfw
-GeneratedTests.neg_dynamic_nnfw
-GeneratedTests.not_equal_boolean
-GeneratedTests.not_equal_broadcast
-GeneratedTests.not_equal_broadcast_4D_2D_float_nnfw
-GeneratedTests.not_equal_broadcast_float_nnfw
-GeneratedTests.not_equal_broadcast_quant8_nnfw
-GeneratedTests.not_equal_dynamic_float_nnfw
-GeneratedTests.not_equal_float_nnfw
-GeneratedTests.not_equal_quant8_nnfw
-GeneratedTests.not_equal_quantized_different_scale
-GeneratedTests.not_equal_quantized_different_zero_point
-GeneratedTests.not_equal_quantized_overflow_first_input_if_requantized
-GeneratedTests.not_equal_quantized_overflow_second_input_if_requantized
-GeneratedTests.not_equal_simple
-GeneratedTests.one_hot_ex_dynamic_nnfw
-GeneratedTests.one_hot_ex_float_1_nnfw
-GeneratedTests.one_hot_ex_float_2_nnfw
-GeneratedTests.one_hot_ex_float_off_value_constant_zero_nnfw
-GeneratedTests.pack_ex_2D_float_1
-GeneratedTests.pack_ex_2D_float_2
-GeneratedTests.pack_ex_2D_int_1
-GeneratedTests.pack_ex_2D_int_2
-GeneratedTests.pack_ex_dynamic_nnfw
-GeneratedTests.pad_dynamic_nnfw
-GeneratedTests.pad_quant8_nnfw
-GeneratedTests.pad_v2_1_float
-GeneratedTests.pad_v2_1_quant8
-GeneratedTests.pad_v2_all_dims
-GeneratedTests.pad_v2_all_dims_quant8
-GeneratedTests.pad_v2_low_rank
-GeneratedTests.pad_v2_low_rank_quant8
-GeneratedTests.pow_2D_float_nnfw
-GeneratedTests.pow_broadcast_float_nnfw
-GeneratedTests.pow_broadcast_float_nnfw_2
-GeneratedTests.pow_broadcast_float_nnfw_3
-GeneratedTests.pow_dynamic_nnfw
-GeneratedTests.prelu
-GeneratedTests.prelu_broadcast_float_1_nnfw
-GeneratedTests.prelu_broadcast_quant8_1_nnfw
-GeneratedTests.prelu_float_1_nnfw
-GeneratedTests.prelu_quant8
-GeneratedTests.prelu_quant8_1_nnfw
-GeneratedTests.prelu_quant8_2
-GeneratedTests.prelu_quant8_3
-GeneratedTests.prelu_quant8_4
-GeneratedTests.prelu_weight_as_input
-GeneratedTests.prelu_weight_as_input_quant8
-GeneratedTests.prelu_weight_as_input_quant8_2
-GeneratedTests.prelu_weight_as_input_quant8_3
-GeneratedTests.prelu_weight_as_input_quant8_4
-GeneratedTests.quantize_quant8
-GeneratedTests.quantize_quant8_2
-GeneratedTests.quantize_quant8_3
-GeneratedTests.quantize_quant8_4
-GeneratedTests.quantize_quant8_5
-GeneratedTests.quantize_quant8_6
-GeneratedTests.quantize_quant8_7
-GeneratedTests.quantize_quant8_8
-GeneratedTests.quantize_zero_sized
-GeneratedTests.range_ex_float_1
-GeneratedTests.range_ex_float_1_all_constant_inputs
-GeneratedTests.range_ex_float_1_dynamic_nnfw
-GeneratedTests.range_ex_float_2
-GeneratedTests.range_ex_float_2_dynamic_nnfw
-GeneratedTests.reduce_all
-GeneratedTests.reduce_all_2
-GeneratedTests.reduce_all_2D_nnfw
-GeneratedTests.reduce_all_3
-GeneratedTests.reduce_all_4D_nnfw
-GeneratedTests.reduce_all_dynamic_nnfw
-GeneratedTests.reduce_any
-GeneratedTests.reduce_any_2
-GeneratedTests.reduce_any_2D_nnfw
-GeneratedTests.reduce_any_3
-GeneratedTests.reduce_any_4D_nnfw
-GeneratedTests.reduce_max
-GeneratedTests.reduce_max_2
-GeneratedTests.reduce_max_2D_float_nnfw
-GeneratedTests.reduce_max_2D_int32_nnfw
-GeneratedTests.reduce_max_3
-GeneratedTests.reduce_max_4
-GeneratedTests.reduce_max_4D_float_reducing_C_nnfw
-GeneratedTests.reduce_max_4D_float_reducing_HW_nnfw
-GeneratedTests.reduce_max_float_1_nnfw
-GeneratedTests.reduce_max_float_2_nnfw
-GeneratedTests.reduce_max_float_nnfw
-GeneratedTests.reduce_max_quant8
-GeneratedTests.reduce_max_quant8_1_nnfw
-GeneratedTests.reduce_max_quant8_2
-GeneratedTests.reduce_max_quant8_2_nnfw
-GeneratedTests.reduce_max_quant8_3
-GeneratedTests.reduce_max_quant8_4
-GeneratedTests.reduce_mean_dynamic_1_nnfw
-GeneratedTests.reduce_mean_dynamic_2_nnfw
-GeneratedTests.reduce_min
-GeneratedTests.reduce_min_2
-GeneratedTests.reduce_min_3
-GeneratedTests.reduce_min_4
-GeneratedTests.reduce_min_dynamic_nnfw
-GeneratedTests.reduce_min_float_1_nnfw
-GeneratedTests.reduce_min_float_2_nnfw
-GeneratedTests.reduce_min_float_nnfw
-GeneratedTests.reduce_min_quant8
-GeneratedTests.reduce_min_quant8_2
-GeneratedTests.reduce_min_quant8_3
-GeneratedTests.reduce_min_quant8_4
-GeneratedTests.reduce_prod
-GeneratedTests.reduce_prod_2
-GeneratedTests.reduce_prod_2D_float_nnfw
-GeneratedTests.reduce_prod_3
-GeneratedTests.reduce_prod_4
-GeneratedTests.reduce_prod_4D_float_nnfw
-GeneratedTests.reduce_prod_4D_float_reducing_C_nnfw
-GeneratedTests.reduce_prod_4D_float_reducing_HW_nnfw
-GeneratedTests.reduce_prod_dynamic_1_nnfw
-GeneratedTests.reduce_prod_dynamic_2_nnfw
-GeneratedTests.reduce_sum
-GeneratedTests.reduce_sum_2
-GeneratedTests.reduce_sum_2D_float_nnfw
-GeneratedTests.reduce_sum_3
-GeneratedTests.reduce_sum_4
-GeneratedTests.reduce_sum_4D_float_nnfw
-GeneratedTests.reduce_sum_4D_float_reducing_C_nnfw
-GeneratedTests.reduce_sum_4D_float_reducing_HW_nnfw
-GeneratedTests.reduce_sum_dynamic_1_nnfw
-GeneratedTests.reduce_sum_dynamic_2_nnfw
-GeneratedTests.relu1_quant8_1
-GeneratedTests.relu1_quant8_2
-GeneratedTests.relu6_quant8_1
-GeneratedTests.relu6_quant8_2
-GeneratedTests.relu_quant8_1
-GeneratedTests.relu_quant8_2
-GeneratedTests.reshape_dynamic_nnfw
-GeneratedTests.resize_bilinear
-GeneratedTests.resize_bilinear_2
-GeneratedTests.resize_bilinear_quant8_nnfw
-GeneratedTests.resize_nearest_neighbor_shape_nhwc
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8
-GeneratedTests.resize_nearest_neighbor_shape_nchw
-GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8
-GeneratedTests.resize_nearest_neighbor_scale_nhwc
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8
-GeneratedTests.resize_nearest_neighbor_scale_nchw
-GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_2
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_2
-GeneratedTests.resize_nearest_neighbor_shape_nchw_2
-GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_2
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_2
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_2
-GeneratedTests.resize_nearest_neighbor_scale_nchw_2
-GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_2
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_3
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_3
-GeneratedTests.resize_nearest_neighbor_shape_nchw_3
-GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_3
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_3
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_3
-GeneratedTests.resize_nearest_neighbor_scale_nchw_3
-GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_3
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_4
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_4
-GeneratedTests.resize_nearest_neighbor_shape_nchw_4
-GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_4
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_4
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_4
-GeneratedTests.resize_nearest_neighbor_scale_nchw_4
-GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_4
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_5
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_5
-GeneratedTests.resize_nearest_neighbor_shape_nchw_5
-GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_5
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_5
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_5
-GeneratedTests.resize_nearest_neighbor_scale_nchw_5
-GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_5
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_6
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_6
-GeneratedTests.resize_nearest_neighbor_shape_nchw_6
-GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_6
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_6
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_6
-GeneratedTests.resize_nearest_neighbor_scale_nchw_6
-GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_6
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_7
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_7
-GeneratedTests.resize_nearest_neighbor_shape_nchw_7
-GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_7
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_7
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_7
-GeneratedTests.resize_nearest_neighbor_scale_nchw_7
-GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_7
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_8
-GeneratedTests.resize_nearest_neighbor_shape_nhwc_quant8_8
-GeneratedTests.resize_nearest_neighbor_shape_nchw_8
-GeneratedTests.resize_nearest_neighbor_shape_nchw_quant8_8
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_8
-GeneratedTests.resize_nearest_neighbor_scale_nhwc_quant8_8
-GeneratedTests.resize_nearest_neighbor_scale_nchw_8
-GeneratedTests.resize_nearest_neighbor_scale_nchw_quant8_8
-GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc
-GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8
-GeneratedTests.resize_nearest_neighbor_zero_sized_nchw
-GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8
-GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_2
-GeneratedTests.resize_nearest_neighbor_zero_sized_nhwc_quant8_2
-GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_2
-GeneratedTests.resize_nearest_neighbor_zero_sized_nchw_quant8_2
-GeneratedTests.reverse_ex_1d
-GeneratedTests.reverse_ex_3d
-GeneratedTests.reverse_ex_dynamic_1D
-GeneratedTests.reverse_ex_dynamic_3D
-GeneratedTests.rnn
-GeneratedTests.rnn_state
-GeneratedTests.round_ex_1D_float
-GeneratedTests.round_ex_4D_float
-GeneratedTests.round_ex_dynamic_nnfw
-GeneratedTests.rsqrt
-GeneratedTests.rsqrt_1D_float_nnfw
-GeneratedTests.rsqrt_2D_float_nnfw
-GeneratedTests.rsqrt_3D_float_nnfw
-GeneratedTests.rsqrt_4D_float_nnfw
-GeneratedTests.rsqrt_dynamic_nnfw
-GeneratedTests.select_v1_2_five_dim
-GeneratedTests.select_v1_2_five_dim_quant8
-GeneratedTests.select_v1_2_one_dim
-GeneratedTests.select_v1_2_one_dim_quant8
-GeneratedTests.select_v1_2_two_dim
-GeneratedTests.select_v1_2_two_dim_quant8
-GeneratedTests.select_v2_ex_broadcast_1d_single_value
-GeneratedTests.select_v2_ex_broadcast_2d_one
-GeneratedTests.select_v2_ex_broadcast_2d_two
-GeneratedTests.select_v2_ex_broadcast_2d_two_dynamic_nnfw
-GeneratedTests.select_v2_ex_broadcast_less_4d
-GeneratedTests.select_v2_ex_float
-GeneratedTests.shape_ex_dynamic_nnfw
-GeneratedTests.sin_1D_float_nnfw
-GeneratedTests.sin_4D_float_nnfw
-GeneratedTests.sin_dynamic_nnfw
-GeneratedTests.slice
-GeneratedTests.slice_2
-GeneratedTests.slice_3
-GeneratedTests.slice_4
-GeneratedTests.slice_5
-GeneratedTests.slice_6
-GeneratedTests.slice_7
-GeneratedTests.slice_8
-GeneratedTests.slice_dynamic_nnfw
-GeneratedTests.slice_zero_sized
-GeneratedTests.slice_zero_sized_quant8
-GeneratedTests.softmax_dynamic_nnfw
-GeneratedTests.softmax_quant8_1
-GeneratedTests.softmax_quant8_2
-GeneratedTests.space_to_batch
-GeneratedTests.space_to_batch_dynamic_float_nnfw
-GeneratedTests.space_to_batch_float_1
-GeneratedTests.space_to_batch_float_1_nnfw
-GeneratedTests.space_to_batch_float_2
-GeneratedTests.space_to_batch_float_3
-GeneratedTests.space_to_batch_quant8_1
-GeneratedTests.space_to_batch_quant8_1_nnfw
-GeneratedTests.space_to_batch_quant8_2
-GeneratedTests.space_to_batch_quant8_2_nnfw
-GeneratedTests.space_to_batch_quant8_3
-GeneratedTests.space_to_depth_float_1
-GeneratedTests.space_to_depth_float_2
-GeneratedTests.space_to_depth_float_3
-GeneratedTests.space_to_depth_quant8_1
-GeneratedTests.space_to_depth_quant8_2
-GeneratedTests.split_1D_float_nnfw
-GeneratedTests.split_1D_int32_nnfw
-GeneratedTests.split_4D_float_1_nnfw
-GeneratedTests.split_4D_float_2_nnfw
-GeneratedTests.split_4D_float_3_nnfw
-GeneratedTests.split_4D_int32_1_nnfw
-GeneratedTests.split_4D_int32_2_nnfw
-GeneratedTests.split_4D_int32_3_nnfw
-GeneratedTests.split_4D_int32_4_nnfw
-GeneratedTests.split_4D_int32_5_nnfw
-GeneratedTests.split_4D_quant8_nnfw
-GeneratedTests.split_dynamic_float_nnfw
-GeneratedTests.split_float_1
-GeneratedTests.split_float_2
-GeneratedTests.split_float_3
-GeneratedTests.split_float_4
-GeneratedTests.split_float_5
-GeneratedTests.split_float_5_axis_as_input_nnfw
-GeneratedTests.split_int32_1
-GeneratedTests.split_int32_1_relaxed
-GeneratedTests.split_int32_2
-GeneratedTests.split_int32_2_relaxed
-GeneratedTests.split_int32_3
-GeneratedTests.split_int32_3_relaxed
-GeneratedTests.split_int32_4
-GeneratedTests.split_int32_4_relaxed
-GeneratedTests.split_quant8_1
-GeneratedTests.split_quant8_1_relaxed
-GeneratedTests.split_quant8_2
-GeneratedTests.split_quant8_2_relaxed
-GeneratedTests.split_quant8_3
-GeneratedTests.split_quant8_4
-GeneratedTests.split_v_ex_1D_float_1_nnfw
-GeneratedTests.split_v_ex_1D_float_2_nnfw
-GeneratedTests.split_v_ex_1D_int32_nnfw
-GeneratedTests.split_v_ex_4D_float_1_nnfw
-GeneratedTests.split_v_ex_4D_float_2_nnfw
-GeneratedTests.split_v_ex_4D_float_3_nnfw
-GeneratedTests.split_v_ex_4D_float_4_nnfw
-GeneratedTests.split_v_ex_4D_int32_1_nnfw
-GeneratedTests.split_v_ex_4D_int32_2_nnfw
-GeneratedTests.split_v_ex_4D_int32_3_nnfw
-GeneratedTests.split_v_ex_4D_int32_4_nnfw
-GeneratedTests.sqrt_
-GeneratedTests.sqrt_1D_float_nnfw
-GeneratedTests.sqrt_2D_float_nnfw
-GeneratedTests.sqrt_3D_float_nnfw
-GeneratedTests.sqrt_4D_float_nnfw
-GeneratedTests.squared_difference_ex_1D_float
-GeneratedTests.squared_difference_ex_2D_float
-GeneratedTests.squared_difference_ex_3D_float
-GeneratedTests.squared_difference_ex_4D_float
-GeneratedTests.squared_difference_ex_broadcast_4D_2D_float
-GeneratedTests.squared_difference_ex_broadcast_float
-GeneratedTests.squared_difference_ex_dynamic_nnfw
-GeneratedTests.squeeze
-GeneratedTests.squeeze_2D_float_1_nnfw
-GeneratedTests.squeeze_dynamic_float_nnfw
-GeneratedTests.squeeze_float_1
-GeneratedTests.squeeze_float_1_relaxed
-GeneratedTests.squeeze_quant8_1
-GeneratedTests.squeeze_relaxed
-GeneratedTests.stateless_random_uniform_ex_nnfw
-GeneratedTests.strided_slice
-GeneratedTests.strided_slice_dynamic_nnfw
-GeneratedTests.strided_slice_float_1
-GeneratedTests.strided_slice_float_10
-GeneratedTests.strided_slice_float_11
-GeneratedTests.strided_slice_float_2
-GeneratedTests.strided_slice_float_3
-GeneratedTests.strided_slice_float_4
-GeneratedTests.strided_slice_float_5
-GeneratedTests.strided_slice_float_6
-GeneratedTests.strided_slice_float_7
-GeneratedTests.strided_slice_float_8
-GeneratedTests.strided_slice_float_9
-GeneratedTests.strided_slice_qaunt8_10
-GeneratedTests.strided_slice_qaunt8_11
-GeneratedTests.strided_slice_quant8_1
-GeneratedTests.strided_slice_quant8_2
-GeneratedTests.strided_slice_quant8_3
-GeneratedTests.strided_slice_quant8_4
-GeneratedTests.strided_slice_quant8_5
-GeneratedTests.strided_slice_quant8_6
-GeneratedTests.strided_slice_quant8_7
-GeneratedTests.strided_slice_quant8_8
-GeneratedTests.strided_slice_quant8_9
-GeneratedTests.sub_dynamic_nnfw
-GeneratedTests.sub_v1_2_broadcast_quant8
-GeneratedTests.sub_v1_2_quant8
-GeneratedTests.sub_v1_2_zero_sized
-GeneratedTests.sub_v1_2_zero_sized_quant8
-GeneratedTests.svdf
-GeneratedTests.svdf2
-GeneratedTests.svdf_bias_present
-GeneratedTests.svdf_state
-GeneratedTests.tanh_v1_2
-GeneratedTests.tanh_v1_2_2
-GeneratedTests.tanh_v1_2_zero_sized
-GeneratedTests.tanh_v1_2_zero_sized_quant8
-GeneratedTests.tanh_v1_dynamic_nnfw
-GeneratedTests.tile_1
-GeneratedTests.tile_1_dynamic_float32_nnfw
-GeneratedTests.tile_1_float16
-GeneratedTests.tile_1_quant8
-GeneratedTests.tile_2
-GeneratedTests.tile_2_dynamic_float32_nnfw
-GeneratedTests.tile_2_float16
-GeneratedTests.tile_2_int32
-GeneratedTests.tile_2_quant8
-GeneratedTests.tile_3
-GeneratedTests.tile_3_dynamic_float32_nnfw
-GeneratedTests.tile_3_float16
-GeneratedTests.tile_3_int32
-GeneratedTests.tile_3_quant8
-GeneratedTests.topk_v2
-GeneratedTests.topk_v2_1D_float_nnfw
-GeneratedTests.topk_v2_1D_int32_nnfw
-GeneratedTests.topk_v2_1D_quant8_nnfw
-GeneratedTests.topk_v2_2
-GeneratedTests.topk_v2_2D_float_nnfw
-GeneratedTests.topk_v2_2D_int32_nnfw
-GeneratedTests.topk_v2_2D_quant8_nnfw
-GeneratedTests.topk_v2_3
-GeneratedTests.topk_v2_4
-GeneratedTests.topk_v2_5
-GeneratedTests.topk_v2_6
-GeneratedTests.transpose
-GeneratedTests.transpose_2D_nnfw
-GeneratedTests.transpose_3D_nnfw
-GeneratedTests.transpose_dynamic_nnfw
-GeneratedTests.transpose_float_1
-GeneratedTests.transpose_float_1_perms_as_input_nnfw
-GeneratedTests.transpose_quant8_1
-GeneratedTests.transpose_v1_2
-GeneratedTests.transpose_v1_2_quant8
-GeneratedTests.transpose_v1_2_zero_sized
-GeneratedTests.transpose_v1_2_zero_sized_quant8
-GeneratedTests.unidirectional_sequence_lstm_1step
-GeneratedTests.unidirectional_sequence_lstm_batch_major_norm_peephole_projection
-GeneratedTests.unidirectional_sequence_lstm_batch_major_peephole_projection_bias
-GeneratedTests.unidirectional_sequence_lstm_dynamic_nnfw
-GeneratedTests.unidirectional_sequence_lstm_layer_norm_cifg_peephole
-GeneratedTests.unidirectional_sequence_lstm_norm_peephole_projection
-GeneratedTests.unpack_ex_3D_float_1
-GeneratedTests.unpack_ex_3D_float_2
-GeneratedTests.unpack_ex_3D_int_1
-GeneratedTests.unpack_ex_3D_int_2
-GeneratedTests.unpack_ex_dynamic_nnfw
-GeneratedTests.zeros_like_ex_2D_float
-GeneratedTests.zeros_like_ex_4D_int32
-GeneratedTests.zeros_like_ex_dynamic_float32
//#include "NeuralNetworksWrapperExtensions.h"
#include <math.h>
-// Fix for onert: use boost::optional instead of std::optional
-// TODO in onert: introduce and use internal optional library
-#include <boost/optional.hpp>
+#include <optional>
#include <string>
#include <vector>
target_link_libraries(${RUNTIME_NNFW_API_TEST} ${LIB_PTHREAD} dl)
target_link_libraries(${RUNTIME_NNFW_API_TEST} circle_schema)
-install(TARGETS ${RUNTIME_NNFW_API_TEST} DESTINATION unittest_standalone)
+install(TARGETS ${RUNTIME_NNFW_API_TEST} DESTINATION unittest)
# Install nnpackage test model (add)
set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/nnpackage/examples/v1.0.0/add)
-set(NNPACKAGE_INSTALL_TARGET unittest_standalone/nnfw_api_gtest_models)
+set(NNPACKAGE_INSTALL_TARGET unittest/nnfw_api_gtest_models)
install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/add)
# Install nnpackage test model (mobilenet)
set(NNPACKAGE_MODEL_DIR ${NNAS_PROJECT_SOURCE_DIR}/runtime/contrib/TFLiteSharp/TFLiteTestApp/res/)
-set(NNPACKAGE_INSTALL_TARGET unittest_standalone/nnfw_api_gtest_models)
-
install(DIRECTORY ${NNPACKAGE_MODEL_DIR} DESTINATION ${NNPACKAGE_INSTALL_TARGET}/mobilenet_v1_1.0_224)
NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "USE_SCHEDULER", "1"));
NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "PROFILING_MODE", "0"));
NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "PROFILING_MODE", "1"));
- NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "DISABLE_COMPILE", "0"));
- NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "DISABLE_COMPILE", "1"));
SUCCEED();
}
+++ /dev/null
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "fixtures.h"
-#include "common.h"
-#include <fstream>
-#include <stdio.h>
-#include <json/json.h>
-#include <thread>
-
-void build_partition_map()
-{
- Json::Value root;
- Json::Value graphs(Json::arrayValue);
- int num = 31;
-
- for (int i = 0; i < num; i++)
- {
- if (i < 7)
- graphs.append(Json::Value(0));
- else
- graphs.append(Json::Value(1));
- }
-
- root["partition_map"] = graphs;
- root["num_partitions"] = 2;
-
- Json::StyledWriter sw;
- std::string jsonString = sw.write(root);
-
- FILE *pFile = NULL;
-
- pFile = fopen("./partition_map.json", "wt");
- fwrite(jsonString.c_str(), jsonString.length(), 1, pFile);
- fclose(pFile);
-}
-
-TEST_F(ValidationTestPipelineSession, create_pipeline_001)
-{
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
- SUCCEED();
-}
-
-TEST_F(ValidationTestPipelineSession, pipeline_session_test_model)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- remove("./partition_map.json");
- SUCCEED();
-}
-
-TEST_F(ValidationTestPipelineSession, prepare_pipeline_001)
-{
- std::ifstream readFile("./partition_map.json");
-
- if (readFile.good())
- {
- remove("./partition_map.json");
- }
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- ASSERT_EQ(nnfw_prepare_pipeline(_session, "./partition_map.json"), NNFW_STATUS_ERROR);
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-}
-
-TEST_F(ValidationTestPipelineSession, prepare_pipeline_002)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- remove("./partition_map.json");
-
- SUCCEED();
-}
-
-TEST_F(ValidationTestPipelineSession, input_tensorinfo_pipeline)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
- nnfw_tensorinfo t_input;
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_session, 0, &t_input));
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- remove("./partition_map.json");
-
- SUCCEED();
-}
-
-TEST_F(ValidationTestPipelineSession, output_tensorinfo_pipeline)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
- nnfw_tensorinfo t_output;
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &t_output));
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- remove("./partition_map.json");
-
- SUCCEED();
-}
-
-TEST_F(ValidationTestPipelineSession, input_size_pipeline)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
- uint32_t input_num = -1;
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- NNFW_ENSURE_SUCCESS(nnfw_input_size(_session, &input_num));
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- ASSERT_EQ(input_num, 1);
-
- remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, output_size_pipeline)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
- uint32_t output_num = -1;
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- NNFW_ENSURE_SUCCESS(nnfw_output_size(_session, &output_num));
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- ASSERT_EQ(output_num, 1);
- remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, set_input_tensorinfo_pipeline)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
- nnfw_tensorinfo t_input_original;
- nnfw_tensorinfo t_input_after;
- nnfw_tensorinfo t_input = {NNFW_TYPE_TENSOR_FLOAT32, 4, {1, 224, 224, 3}};
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_session, 0, &t_input_original));
- NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &t_input));
- NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_session, 0, &t_input_after));
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- ASSERT_TRUE(tensorInfoEqual(t_input_original, t_input_after));
-
- remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, input_output_tensorindex)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- uint32_t input_index = 100;
- NNFW_ENSURE_SUCCESS(nnfw_input_tensorindex(_session, "input", &input_index));
- ASSERT_EQ(input_index, 0);
-
- uint32_t output_index = 100;
- NNFW_ENSURE_SUCCESS(
- nnfw_output_tensorindex(_session, "MobilenetV1/Predictions/Reshape_1", &output_index));
- ASSERT_EQ(output_index, 0);
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_create_pipeline_001)
-{
- ASSERT_EQ(nnfw_create_session(nullptr), NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestPipelineSession, neg_pipeline_session_model_load)
-{
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
-
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- ASSERT_EQ(nnfw_load_model_from_modelfile(
- nullptr, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()),
- NNFW_STATUS_UNEXPECTED_NULL);
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-}
-
-TEST_F(ValidationTestPipelineSession, neg_prepare_pipeline_001)
-{
- ASSERT_EQ(nnfw_prepare_pipeline(nullptr, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
-}
-
-TEST_F(ValidationTestPipelineSession, neg_set_in_pipeline)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
- float input_buf[1 * 224 * 224 * 3];
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input_buf, sizeof(input_buf)),
- NNFW_STATUS_ERROR);
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_set_out_pipeline)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
- float output_buf[1 * 1001];
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, output_buf, sizeof(output_buf)),
- NNFW_STATUS_ERROR);
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_input_tensorinfo_pipeline_001)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
- nnfw_tensorinfo t_input;
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- ASSERT_EQ(nnfw_input_tensorinfo(nullptr, 0, &t_input), NNFW_STATUS_UNEXPECTED_NULL);
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_input_tensorinfo_pipeline_002)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
- nnfw_tensorinfo t_input;
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- ASSERT_EQ(nnfw_input_tensorinfo(_session, 1, &t_input), NNFW_STATUS_ERROR);
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_output_tensorinfo_pipeline_001)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
- nnfw_tensorinfo t_output;
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- ASSERT_EQ(nnfw_output_tensorinfo(nullptr, 0, &t_output), NNFW_STATUS_UNEXPECTED_NULL);
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_output_tensorinfo_pipeline_002)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
- nnfw_tensorinfo t_output;
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- ASSERT_EQ(nnfw_output_tensorinfo(_session, 1, &t_output), NNFW_STATUS_ERROR);
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_input_output_size_pipeline)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
- uint32_t input_num = -1;
- uint32_t output_num = -1;
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- ASSERT_EQ(nnfw_input_size(nullptr, &input_num), NNFW_STATUS_UNEXPECTED_NULL);
- ASSERT_EQ(input_num, -1);
- ASSERT_EQ(nnfw_output_size(nullptr, &output_num), NNFW_STATUS_UNEXPECTED_NULL);
- ASSERT_EQ(output_num, -1);
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_set_input_tensorinfo_pipeline)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
- nnfw_tensorinfo t_input = {NNFW_TYPE_TENSOR_FLOAT32, 4, {1, 224, 224, 3}};
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- ASSERT_EQ(nnfw_set_input_tensorinfo(nullptr, 0, &t_input), NNFW_STATUS_UNEXPECTED_NULL);
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_input_output_tensorindex)
-{
- std::vector<void *> dummy1;
- std::vector<uint32_t> dummy2;
-
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- uint32_t input_index = 100;
- ASSERT_EQ(nnfw_input_tensorindex(_session, "input1", &input_index), NNFW_STATUS_ERROR);
- ASSERT_EQ(input_index, 100);
-
- uint32_t output_index = 100;
- ASSERT_EQ(nnfw_output_tensorindex(_session, "MobilenetV1/Predictions/Reshape_2", &output_index),
- NNFW_STATUS_ERROR);
- ASSERT_EQ(output_index, 100);
-
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, &dummy1, &dummy2));
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, neg_run_pipeline)
-{
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- auto producer = [this]() {
- std::vector<void *> inputs;
- std::vector<uint32_t> lengths;
- inputs.clear();
- lengths.clear();
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, (void *)&inputs, (void *)&lengths));
- };
-
- auto consumer = [this]() {
- std::vector<void *> outputs;
- ASSERT_EQ(nnfw_pop_pipeline_output(_session, (void *)&outputs), NNFW_STATUS_ERROR);
- };
-
- auto producer_thread = std::thread(producer);
- auto consumer_thread = std::thread(consumer);
-
- producer_thread.join();
- consumer_thread.join();
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- remove("./partition_map.json");
-}
-
-TEST_F(ValidationTestPipelineSession, run_pipeline)
-{
- build_partition_map();
-
- NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
- NNFW_ENSURE_SUCCESS(nnfw_load_model_from_modelfile(
- _session, NNPackages::get().getModelAbsoluteFilePath("mobilenet_v1_1.0_224").c_str()));
- NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
- NNFW_ENSURE_SUCCESS(nnfw_prepare_pipeline(_session, "./partition_map.json"));
-
- auto producer = [this]() {
- std::vector<void *> inputs;
- std::vector<uint32_t> lengths;
- inputs.clear();
- lengths.clear();
- NNFW_ENSURE_SUCCESS(nnfw_push_pipeline_input(_session, (void *)&inputs, (void *)&lengths));
- };
-
- auto producer_thread = std::thread(producer);
-
- producer_thread.join();
- NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
-
- remove("./partition_map.json");
-
- SUCCEED();
-}
- input.h5 (input data)
- expected.h5 (expected outpute data)
-`nnpkg_test` uses `nnpackage_run` internally to run `nnpackage`.
+`nnpkg_test` uses `onert_run` internally to run `nnpackage`.
Then, it compares through `difftool` (either `i5diff` or `h5diff`).
(dumped file are always deleted on success) (default=0)
Environment variables:
- nnpackage_run path to nnpackage_run (default=Product/out/bin/nnpackage_run)
- difftool path to i5diff or h5diff (default=h5diff)
+ onert_run path to onert_run (default=Product/out/bin/onert_run)
+ difftool path to i5diff or h5diff (default=h5diff)
Examples:
nnpkg_test.sh Add_000 => run ./Add_000 and check output
$BRIDGE shell rm $TEST_ROOT/nnpkg.tar.gz
# 1. Run
-$BRIDGE shell LD_LIBRARY_PATH=$TEST_ROOT/Product/out/lib TRACE_FILEPATH=$TEST_ROOT/trace.json BACKENDS=$BACKENDS $TEST_ROOT/Product/out/bin/nnpackage_run --nnpackage $NNPKG_PATH_TARGET -r $NUM_RUNS
+$BRIDGE shell LD_LIBRARY_PATH=$TEST_ROOT/Product/out/lib TRACE_FILEPATH=$TEST_ROOT/trace.json BACKENDS=$BACKENDS $TEST_ROOT/Product/out/bin/onert_run --nnpackage $NNPKG_PATH_TARGET -r $NUM_RUNS
# 2. Pull result file
echo "Pulling data from target to trace.json"
+++ /dev/null
-#!/bin/bash
-#
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-source $MY_PATH/common.sh
-
-BENCHMARK_DRIVER_BIN=
-BENCHMARK_REPORT_DIR=
-BENCHMARK_MODELS_FILE=
-MODEL_TEST_ROOT_PATH=
-TEST_OP="false"
-BENCHMARK_MODEL_LIST="MODELS/inception_nonslim MODELS/inception_slim MODELS/mobilenet"
-BACKEND_LIST="acl_cl acl_neon cpu" #TODO: accept this list as argument
-EXECUTORS="Linear Parallel" #TODO: accept this list as argument
-
-function Usage()
-{
- echo "Usage: ./$0 --reportdir=. --driverbin=Product/out/bin/tflite_run"
-}
-
-for i in "$@"
-do
- case $i in
- -h|--help|help)
- Usage
- exit 1
- ;;
- --test_op)
- TEST_OP="true"
- ;;
- --driverbin=*)
- BENCHMARK_DRIVER_BIN=${i#*=}
- ;;
- --reportdir=*)
- BENCHMARK_REPORT_DIR=${i#*=}
- BENCHMARK_MODELS_FILE=$BENCHMARK_REPORT_DIR/benchmark_models.txt
- ;;
- --modelfilepath=*)
- TEST_LIST_PATH=${i#*=}
- MODEL_TEST_ROOT_PATH=$TEST_LIST_PATH/tests
- ;;
- esac
- shift
-done
-
-function get_benchmark_op_list()
-{
- local TEST_DIRS="$@"
- local TESTS_TO_RUN=""
-
- if [[ $# -eq 0 ]]; then
- TEST_DIRS="."
- fi
-
- shift $#
-
- pushd $MODEL_TEST_ROOT_PATH > /dev/null
- for DIR in $TEST_DIRS; do
- if [ -d "$DIR" ]; then
- TESTS_FOUND=$(find "$DIR" -type f -name 'config.sh' -exec dirname {} \;| sed 's|^./||' | grep -v '^MODELS/' | sort)
- TESTS_TO_RUN="$TESTS_TO_RUN $TESTS_FOUND"
- fi
- done
- popd > /dev/null
-
- BENCHMARK_MODEL_LIST=$(echo "${TESTS_TO_RUN}")
-}
-
-function profile_for_he_shed()
-{
-
- local REPORT_MODEL_DIR=$1
- local RUN_TEST_SH=$2
- local BENCHMARK_DRIVER_BIN=$3
- local MODEL=$4
- local PROFILING_RUN_CNT=$5
-
- export USE_SCHEDULER=1
- export PROFILING_MODE=1
- export EXECUTOR="Dataflow"
- export ONERT_LOG_ENABLE=1
-
- rm "exec_time.json" 2>/dev/null
- for ((j = 1 ; j <= $PROFILING_RUN_CNT ; j++)); do
- # Save the verbose log of each run
- LOG_FILE=$REPORT_MODEL_DIR/tflite_profiling_$j.txt
-
- print_with_dots "Profiling run #$j out of $PROFILING_RUN_CNT"
-
- $RUN_TEST_SH --driverbin=$BENCHMARK_DRIVER_BIN $MODEL > $LOG_FILE 2>&1
- RET=$?
- if [[ $RET -ne 0 ]]; then
- echo "Profiling $MODEL aborted in run#$j... exit code: $RET"
- exit $RET
- fi
- echo "finished"
- # Save the exec_time.json of each run
- cp "exec_time.json" $REPORT_MODEL_DIR/"exec_time_$j.json"
- done
- unset USE_SCHEDULER PROFILING_MODE EXECUTOR ONERT_LOG_ENABLE
-}
-
-function run_with_he_scheduler()
-{
- local REPORT_MODEL_DIR=$1
- local RUN_TEST_SH=$2
- local BENCHMARK_DRIVER_BIN=$3
- local MODEL=$4
- local EXECUTOR=$5
-
- LOG_FILE=$REPORT_MODEL_DIR/tflite_onert_with_he_scheduler_in_$EXECUTOR.txt
- export EXECUTOR=$EXECUTOR
- export GRAPH_DOT_DUMP=1
- export USE_SCHEDULER=1
- export ONERT_LOG_ENABLE=1
-
- print_with_dots "TFLite onert $EXECUTOR with HEScheduler"
-
- RESULT=$(get_result_of_benchmark_test $RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
- echo "$RESULT ms"
-
- mv "after_lower.dot" $REPORT_MODEL_DIR/"after_lower_$EXECUTOR.dot"
- unset EXECUTOR GRAPH_DOT_DUMP USE_SCHEDULER ONERT_LOG_ENABLE
-}
-
-function run_onert_with_all_config()
-{
- local MODEL=$1
- local REPORT_MODEL_DIR=$2
- local PAUSE_TIME_IN_SEC=$3
- local BENCHMARK_DRIVER_BIN=$4
- local EXECUTORS=$5
- local BACKEND_LIST=$6
-
- export USE_NNAPI=1
-
- # Run profiler BACKEND_CNT+1 times: on each run of the first BACKEND_CNT runs it will
- # collect metrics for one unmeasured backend. On the last run metrics for data transfer
- PROFILING_RUN_CNT=1
- BACKENDS_TO_USE=
- for backend in $BACKEND_LIST; do
- BACKENDS_TO_USE+=$backend';'
- ((++PROFILING_RUN_CNT))
- done
- export BACKENDS=$BACKENDS_TO_USE
- if [ "$TEST_OP" == "false" ]; then
- profile_for_he_shed $REPORT_MODEL_DIR $BENCHMARK_DRIVER_BIN $MODEL $PROFILING_RUN_CNT
- fi
-
- for executor in $EXECUTORS; do
- export EXECUTOR=$executor
- if [ "$TEST_OP" == "false" ]; then
- run_with_he_scheduler $REPORT_MODEL_DIR $BENCHMARK_DRIVER_BIN $MODEL $executor
- fi
- for backend in $BACKEND_LIST; do
- export OP_BACKEND_ALLOPS=$backend
- run_benchmark_and_print "tflite_onert_"$executor"_executor_$backend" "TFLite onert $executor Executor $backend"\
- $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN
- done
- done
- unset USE_NNAPI EXECUTOR OP_BACKEND_ALLOPS BACKENDS
-}
-
-function run_benchmark_test()
-{
- local LOG_FILE=
- local RESULT_FILE=
- local RESULT=
- local REPORT_MODEL_DIR=
-
- export COUNT=5
- export ONERT_LOG_ENABLE=1
- echo
- echo "============================================"
- echo
- date +'%Y-%m-%d %H:%M:%S %s'
- echo
- local i=0
- for MODEL in $BENCHMARK_MODEL_LIST; do
-
- STATUS="enabled"
- if [ "$TEST_OP" == "true" ]; then
- source $MODEL_TEST_ROOT_PATH/$MODEL/config.sh
- fi
-
- # Skip 'disabled' tests
- if [ $(tr '[:upper:]' '[:lower:]' <<< "$STATUS") == "disabled" ]; then
- continue
- fi
-
- echo "Benchmark test with `basename $BENCHMARK_DRIVER_BIN` & `echo $MODEL`"
- echo $MODEL >> $BENCHMARK_MODELS_FILE
-
- REPORT_MODEL_DIR=$BENCHMARK_REPORT_DIR/$MODEL
- mkdir -p $REPORT_MODEL_DIR
-
- # TFLite+CPU
- unset USE_NNAPI
- run_benchmark_and_print "tflite_cpu" "TFLite CPU" $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN
-
- # run onert
- if [ "$TEST_OP" == "true" ]; then
- # Operation test don't need to test each scheduler
- run_onert_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN "Linear" "$BACKEND_LIST"
- else
- run_onert_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN "$EXECUTORS" "$BACKEND_LIST"
- fi
-
- if [[ $i -ne $(echo $BENCHMARK_MODEL_LIST | wc -w)-1 ]]; then
- echo ""
- fi
- i=$((i+1))
- done
- echo "============================================"
- unset COUNT
-}
-
-if [ ! -e "$BENCHMARK_REPORT_DIR" ]; then
- mkdir -p $BENCHMARK_REPORT_DIR
-fi
-
-if [ "$TEST_OP" == "true" ]; then
- get_benchmark_op_list
-fi
-
-rm -rf $BENCHMARK_MODELS_FILE
-
-echo ""
-# print the result AND append to log file
-run_benchmark_test 2>&1 | tee -a onert_benchmarks.txt
-echo ""
#!/bin/bash
-usage()
-{
- echo "$0 <options>"
- echo "Options"
- echo "--nnpackage_run : specific nnpackage_run path"
- echo "--tflite_run : specific tflite_run path"
- echo "--dir : the dir path of models"
- echo "--list : the model list"
- echo "--out : the file name of out results"
- echo "--tv : for tv"
- exit 1
-}
+MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+source $MY_PATH/common.sh
-scripts_dir="$( cd "$( dirname "${BASH_SOURCE}" )" && pwd )"
-nnfw_dir="${scripts_dir}/../.."
-nnpackage_run="${nnfw_dir}/Product/out/bin/nnpackage_run"
-tflite_run="${nnfw_dir}/Product/out/bin/tflite_run"
+# Caution: DO NOT USE "pipefail"
+# We should run test all nnpackages
+
+onert_run="$INSTALL_PATH/bin/onert_run"
+tflite_run="$INSTALL_PATH/bin/tflite_run"
base_name="$(basename $0)"
base_name="${base_name%.*}"
outfile="${base_name}_result.txt"
dir=""
-list="${scripts_dir}/list/${base_name}_model_list.txt"
+list="$INSTALL_PATH/test/list/benchmark_nnpkg_model_list.txt"
tv_on="false"
+function usage()
+{
+ echo "Usage: ${BASH_SOURCE[0]} [OPTIONS]"
+ echo "Options"
+ echo " --dir=PATH : the dir path of models"
+ echo " --list=FILE : the model list (default: $list)"
+ echo " --out=FILE : the file name of out results (default: $outfile)"
+ echo " --tv : for tv"
+ echo " --help : display this help message and exit"
+ exit 1
+}
+
for i in "$@"
do
-case $i in
- --nnpackage_run=*)
- nnpackage_run="${i#*=}"
- ;;
- --tflite_run=*)
- tflite_run="${i#*=}"
- ;;
- --out=*)
- outfile="${i#*=}"
- ;;
- --dir=*)
- dir="${i#*=}"
- ;;
- --list=*)
- list="${i#*=}"
- ;;
- --tv)
- tv_on="true"
- ;;
- *)
- ;;
-esac
-shift
+ case $i in
+ --out=*)
+ outfile="${i#*=}"
+ ;;
+ --dir=*)
+ dir="${i#*=}"
+ ;;
+ --list=*)
+ list="${i#*=}"
+ ;;
+ --tv)
+ tv_on="true"
+ ;;
+ --help)
+ usage
+ exit 1
+ ;;
+ *)
+ ;;
+ esac
+ shift
done
-if ! [ -f ${nnpackage_run} ]; then
- echo "nnpackage_run file does not exists."
- usage
-fi
-
-if ! [ -f ${tflite_run} ]; then
- echo "tflite_run file does not exists."
- usage
-fi
-
if ! [ -f ${list} ]; then
echo "model list file does not exists."
usage
for i in "${model_lists[@]}"; do
echo "${i} result" | tee -a ${outfile}
- CMD="${nnpackage_run} -r 10 -m 1 -p 1"
+ CMD="${onert_run} -r 10 -m 1 -p 1"
if [ "$tv_on" == "true" ]; then
${CMD}="${CMD} -g 1"
fi
echo "" >> ${outfile}
- TFLITE_CMD="LD_LIBRARY_PATH=./Product/out/lib THREAD=3 ${tflite_run} -r 10 -m 1 -p 1"
+ TFLITE_CMD="THREAD=3 ${tflite_run} -r 10 -m 1 -p 1"
if [ "$tv_on" == "true" ]; then
TFLITE_CMD="${TFLITE_CMD} -g 1"
fi
sleep 20 # for avoiding cpu overheated
done # ${model_lists}
-${scripts_dir}/merge_result_of_benchmark_nnpkg.py -i . -o . -l ${list}
+python3 $MY_PATH/merge_result_of_benchmark_nnpkg.py -i . -o . -l ${list}
--- /dev/null
+#!/bin/bash
+#
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+source $MY_PATH/common.sh
+
+# Caution: DO NOT USE "pipefail"
+# We should run test all operators
+
+ONERT_DRIVER_BIN=$INSTALL_PATH/bin/onert_run
+TFLITE_DRIVER_BIN=$INSTALL_PATH/bin/tflite_run
+REPORT_DIR=$ROOT_PATH/report
+BENCHMARK_REPORT_DIR=$REPORT_DIR/benchmark_op
+BENCHMARK_MODELS_FILE=$BENCHMARK_REPORT_DIR/benchmark_models.txt
+MODEL_TEST_ROOT_PATH=$INSTALL_PATH/test/models/tflite
+BENCHMARK_MODEL_LIST=
+BACKEND_LIST="acl_cl acl_neon cpu"
+TEST_DIRS="."
+
+function Usage()
+{
+ echo "Usage: ${BASH_SOURCE[0]} [OPTIONS]"
+ echo ""
+ echo "Options:"
+ echo " --backends=STRING Backends to test. (default='$BACKEND_LIST')"
+ echo " --list=FILE List file to test. Test all if list option is not passed"
+}
+
+for i in "$@"
+do
+ case $i in
+ -h|--help|help)
+ Usage
+ exit 1
+ ;;
+ --list=*)
+ TEST_LIST_PATH=${i#*=}
+ TEST_DIRS=$(grep -v '#' $TEST_LIST_PATH | tr '\n' ' ' )
+ ;;
+ --backends=*)
+ BACKEND_LIST=${i#*=}
+ ;;
+ esac
+ shift
+done
+
+function get_benchmark_op_list()
+{
+ local TESTS_TO_RUN=""
+
+ pushd $MODEL_TEST_ROOT_PATH > /dev/null
+ for DIR in $TEST_DIRS; do
+ if [ -d "$DIR" ]; then
+ TESTS_FOUND=$(find "$DIR" -type f -name 'config.sh' -exec dirname {} \;| sed 's|^./||' | grep -v '^MODELS/' | sort)
+ TESTS_TO_RUN="$TESTS_TO_RUN $TESTS_FOUND"
+ fi
+ done
+ popd > /dev/null
+
+ BENCHMARK_MODEL_LIST=$(echo "${TESTS_TO_RUN}")
+}
+
+function run_benchmark_and_print()
+{
+
+ local WRITE_FILE_NAME=$1
+ local MSG=$2
+ local MODEL=$3
+ local REPORT_MODEL_DIR=$4
+ local DRIVER_BIN=$5
+
+ LOG_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.txt
+ RESULT_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.result
+ print_with_dots $MSG
+ RESULT=$(get_result_of_benchmark_test $DRIVER_BIN $MODEL $LOG_FILE)
+ echo "$RESULT ms"
+ echo "$MSG $RESULT" > $RESULT_FILE
+}
+
+function run_onert_with_all_config()
+{
+ local MODEL=$1
+ local REPORT_MODEL_DIR=$2
+ local BENCHMARK_DRIVER_BIN=$3
+
+ # Run profiler BACKEND_CNT+1 times: on each run of the first BACKEND_CNT runs it will
+ # collect metrics for one unmeasured backend. On the last run metrics for data transfer
+ PROFILING_RUN_CNT=1
+ BACKENDS_TO_USE=
+ for backend in $BACKEND_LIST; do
+ BACKENDS_TO_USE+=$backend';'
+ ((++PROFILING_RUN_CNT))
+ done
+ export BACKENDS=$BACKENDS_TO_USE
+ export EXECUTOR="Linear"
+ for backend in $BACKEND_LIST; do
+ export OP_BACKEND_ALLOPS=$backend
+ run_benchmark_and_print "onert_$backend" "ONERT-${backend^^}"\
+ $MODEL $REPORT_MODEL_DIR $BENCHMARK_DRIVER_BIN
+ done
+ unset EXECUTOR OP_BACKEND_ALLOPS BACKENDS
+}
+
+function run_benchmark_test()
+{
+ local LOG_FILE=
+ local RESULT_FILE=
+ local RESULT=
+ local REPORT_MODEL_DIR=
+
+ export COUNT=5
+ echo
+ echo "============================================"
+ echo
+ date +'%Y-%m-%d %H:%M:%S %s'
+ echo
+ local i=0
+ for MODEL in $BENCHMARK_MODEL_LIST; do
+
+ STATUS="enabled"
+ source $MODEL_TEST_ROOT_PATH/$MODEL/config.sh
+
+ # Skip 'disabled' tests
+ if [ $(tr '[:upper:]' '[:lower:]' <<< "$STATUS") == "disabled" ]; then
+ continue
+ fi
+
+ echo "Benchmark test `echo $MODEL`"
+ echo $MODEL >> $BENCHMARK_MODELS_FILE
+
+ REPORT_MODEL_DIR=$BENCHMARK_REPORT_DIR/$MODEL
+ mkdir -p $REPORT_MODEL_DIR
+
+ # TFLite+CPU
+ run_benchmark_and_print "tflite_cpu" "TFLite-CPU" $MODEL $REPORT_MODEL_DIR $TFLITE_DRIVER_BIN
+
+ # run onert
+ # Operation test don't need to test each scheduler
+ run_onert_with_all_config $MODEL $REPORT_MODEL_DIR $ONERT_DRIVER_BIN
+
+ if [[ $i -ne $(echo $BENCHMARK_MODEL_LIST | wc -w)-1 ]]; then
+ echo ""
+ fi
+ i=$((i+1))
+ done
+ echo "============================================"
+ unset COUNT
+}
+
+if [ ! -e "$BENCHMARK_REPORT_DIR" ]; then
+ mkdir -p $BENCHMARK_REPORT_DIR
+fi
+
+get_benchmark_op_list
+
+rm -rf $BENCHMARK_MODELS_FILE
+
+# Model download server setting
+prepare_test_model
+
+echo ""
+# print the result AND append to log file
+run_benchmark_test 2>&1 | tee -a $REPORT_DIR/onert_benchmarks.txt
+echo ""
+
+# Make json file.
+# functions to fill json with benchmark results
+source $MY_PATH/print_to_json.sh
+print_to_json $BENCHMARK_REPORT_DIR $REPORT_DIR "benchmark_op_result.json"
progname=$(basename "${BASH_SOURCE[0]}")
indir="."
outdir="."
-nnpkg_run=${nnpkg_run:-"nnpackage_run"}
+nnpkg_run=${nnpkg_run:-"onert_run"}
difftool=${difftool:-"h5diff"}
delete_dumped_on_failure=0
verbose_diff=0
echo " -v verbose result diff (default=$verbose_diff)"
echo ""
echo "Environment variables:"
- echo " nnpackage_run path to nnpackage_run (default=nnpackage_run)"
- echo " difftool path to i5diff or h5diff (default=h5diff)"
+ echo " onert_run path to onert_run (default=onert_run)"
+ echo " difftool path to i5diff or h5diff (default=h5diff)"
echo ""
echo "Examples:"
echo " $0 $progname Add_000 => run $indir/Add_000 and check output"
# See the License for the specific language governing permissions and
# limitations under the License.
-COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
-
+MODEL_ROOT_DIR=$DRIVER_PATH/models
MD5_CHECK="on"
function Usage()
echo ""
echo "Options:"
echo " --ignoremd5 Ignore MD5 check when download model files"
+ echo " --cachedir=PATH Set downloaded resouces cache directory (default: $CACHE_PATH)"
echo " -h, --help Display this help message and exit"
}
--ignoremd5)
MD5_CHECK="off"
;;
+ --cachedir=*)
+ CACHE_PATH=${i#*=}
+ ;;
*)
echo "Unknown option: $i"
exit 1
shift
done
+function find_tests()
+{
+ local TEST_DIRS="$@"
+ local TESTS_TO_DOWNLOAD=""
+
+ if [[ $# -eq 0 ]]; then
+ TEST_DIRS="."
+ fi
+
+ shift $#
+
+ pushd $MODEL_ROOT_DIR > /dev/null
+ for DIR in $TEST_DIRS; do
+ if [ -d "$DIR" ]; then
+ TESTS_FOUND=$(find "$DIR" -type f -name 'config.sh' -exec dirname {} \;| sed 's|^./||' | sort)
+ TESTS_TO_DOWNLOAD="$TESTS_TO_DOWNLOAD $TESTS_FOUND"
+ else
+ echo "Test $DIR was not found. This test is not added." 1>&2
+ fi
+ done
+ popd > /dev/null
+
+ echo $TESTS_TO_DOWNLOAD
+}
+
+function need_download()
+{
+ LOCAL_PATH=$1
+ REMOTE_URL=$2
+ if [ ! -e $LOCAL_PATH ]; then
+ return 0;
+ fi
+ # Ignore checking md5 in cache
+ # TODO Use "--md5" option only and remove IGNORE_MD5 environment variable
+ if [ ! -z $IGNORE_MD5 ] && [ "$IGNORE_MD5" == "1" ]; then
+ return 1
+ fi
+ if [ "$MD5_CHECK" = "off" ]; then
+ return 1
+ fi
+
+ LOCAL_HASH=$(md5sum $LOCAL_PATH | awk '{ print $1 }')
+ REMOTE_HASH=$(curl --netrc-optional -kLsS $REMOTE_URL | md5sum | awk '{ print $1 }')
+ # TODO Emit an error when Content-MD5 field was not found. (Server configuration issue)
+ if [ "$LOCAL_HASH" != "$REMOTE_HASH" ]; then
+ echo "Downloaded file is outdated or incomplete."
+ return 0
+ fi
+ return 1
+}
+
+function download_tests()
+{
+ SELECTED_TESTS=$@
+
+ echo ""
+ echo "Downloading tests:"
+ echo "======================"
+ for TEST_NAME in $SELECTED_TESTS; do
+ echo $TEST_NAME
+ done
+ echo "======================"
+
+ if [ ! -e $CACHE_PATH ]; then
+ mkdir -p $CACHE_PATH
+ fi
+
+ i=0
+ for TEST_NAME in $SELECTED_TESTS; do
+ # Test configure initialization
+ ((i++))
+ MODELFILE_URL_BASE=""
+ MODELFILE_NAME=""
+ source $MODEL_ROOT_DIR/$TEST_NAME/config.sh
+
+ MODELFILE=$CACHE_PATH/$MODELFILE_NAME
+ MODELFILE_URL="$MODELFILE_URL_BASE/$MODELFILE_NAME"
+ if [ -n "$MODELFILE_SERVER" ]; then
+ MODELFILE_URL="$MODELFILE_SERVER/$MODELFILE_NAME"
+ fi
+
+ # Download model file
+ # Download unless we have it in cache (Also check md5sum)
+ if need_download "$MODELFILE" "$MODELFILE_URL"; then
+ echo ""
+ echo "Download test file for $TEST_NAME"
+ echo "======================"
+
+ rm -f $MODELFILE # Remove invalid file if exists
+ pushd $CACHE_PATH > /dev/null
+ echo "Download $MODELFILE_URL"
+ curl --netrc-optional -kLOsS $MODELFILE_URL
+ if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
+ unzip -o $MODELFILE_NAME -d ${MODELFILE_NAME%.zip}
+ fi
+ popd > /dev/null
+ fi
+
+ done
+}
+
# Check MODELFILE_SERVER
if [[ -z "$MODELFILE_SERVER" ]]; then
echo "Fail to download models: Please set MODELFILE_SERVER to download model"
fi
echo "Download from $MODELFILE_SERVER"
-$INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK
+# Download tflite model
+TESTS_TO_DOWNLOAD=$(find_tests tflite)
+download_tests $TESTS_TO_DOWNLOAD
# See the License for the specific language governing permissions and
# limitations under the License.
-COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
UNITTEST_REPORT_DIR=
-UNITTEST_TEST_DIR=$INSTALL_DIR/unittest
+UNITTEST_TEST_DIR=$INSTALL_PATH/unittest
UNITTEST_RESULT=0
UNITTEST_RUN_ALL=""
# See the License for the specific language governing permissions and
# limitations under the License.
-COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
-
-MD5_CHECK="on"
-TFLITE_LOADER="loader"
REPORT_DIR="report"
TEST_LIST_FILE=
echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [OPTIONS]"
echo ""
echo "Options:"
- echo " --ignoremd5 Ignore MD5 check when download model files"
- echo " --api=(nnapi|loader) TFLite model file loading API (default=$TFLITE_LOADER)"
echo " --reportdir=PATH Path to write report (default=$REPORT_DIR)"
echo " --list=FILE List file to test. Test all if list option is not passed"
+ echo " --cachedir=PATH Set downloaded resouces cache directory (default: $CACHE_PATH)"
}
for i in "$@"
Usage
exit 1
;;
- --ignoremd5)
- MD5_CHECK="off"
- ;;
- --api=*)
- TFLITE_LOADER=${i#*=}
- ;;
--reportdir=*)
REPORT_DIR=${i#*=}
;;
--list=*)
TEST_LIST_FILE=${i#*=}
;;
+ --cachedir=*)
+ CACHE_PATH=${i#*=}
+ ;;
*)
echo "Unknown option: $i"
exit 1
TEST_RESULT=0
TAP_NAME=verification_test.tap
-TEST_NAME="Verification"
-TEST_DRIVER=
-
-if [[ $TFLITE_LOADER == "nnapi" ]]; then
- TEST_NAME="NNAPI Verification"
- TEST_DRIVER=nnapi_test
-elif [[ $TFLITE_LOADER == "loader" ]]; then
- TEST_NAME="Loader Verification"
- TEST_DRIVER=tflite_comparator
-else
- Usage
- exit 1
-fi
+TEST_NAME="Loader Verification"
+TEST_DRIVER=tflite_comparator
-$INSTALL_DIR/test/models/run_test.sh --driverbin=$TEST_DRIVER \
- --download=off --run=on \
+$INSTALL_PATH/test/models/run_test.sh --driverbin=$TEST_DRIVER \
--reportdir=$REPORT_DIR \
--tapname=$TAP_NAME \
+ --cachedir=$CACHE_PATH \
${MODELLIST:-} > $REPORT_DIR/verification_test.log 2>&1
TEST_RESULT=$?
# limitations under the License.
MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+ROOT_PATH="$(cd ${MY_PATH}/../../ && pwd)"
+
+# Install path on CI
+INSTALL_PATH=$ROOT_PATH/Product/out
+TEST_CACHE_PATH=$INSTALL_PATH/test/cache
+
+function prepare_test_model()
+{
+ # Model download server setting
+ if [[ -z "${MODELFILE_SERVER}" ]]; then
+ echo "Model file server is not set. Try to use default setting."
+ else
+ echo "Model Server: ${MODELFILE_SERVER}"
+ fi
+ $INSTALL_PATH/test/onert-test prepare-model --cachedir=$TEST_CACHE_PATH
+}
function get_result_of_benchmark_test()
{
local LOG_FILE=$3
local RET=0
- $MY_PATH/models/run_test.sh --driverbin="$DRIVER_BIN -r 5 -w 3" $MODEL > $LOG_FILE 2>&1
+ $INSTALL_PATH/test/models/run_test.sh --driverbin="$DRIVER_BIN -r 5 -w 3" --cachedir=$TEST_CACHE_PATH $MODEL > $LOG_FILE 2>&1
RET=$?
if [[ $RET -ne 0 ]]; then
echo "Testing $MODEL aborted... exit code: $RET"
echo "$RESULT"
}
-function print_result_of_benchmark_test()
-{
- local NAME=$1
- local RESULT=$2
- local RESULT_FILE=$3
-
- echo "$NAME $RESULT" > $RESULT_FILE
-}
-
function print_with_dots()
{
PRINT_WIDTH=45
printf '%s' "$MSG"
printf '%*.*s ' 0 $padlength "$pad"
}
-
-
-function run_benchmark_and_print()
-{
- local WRITE_FILE_NAME=$1
- local MSG=$2
- local MODEL=$3
- local REPORT_MODEL_DIR=$4
- local PAUSE_TIME_IN_SEC=$5
- local DRIVER_BIN=$6
- local BENCHMARK_RUN_TEST_SH=$7
-
- LOG_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.txt
- RESULT_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.result
- print_with_dots $MSG
- RESULT=$(get_result_of_benchmark_test $DRIVER_BIN $MODEL $LOG_FILE)
- echo "$RESULT ms"
- print_result_of_benchmark_test "$MSG" "$RESULT" $RESULT_FILE
- sleep $PAUSE_TIME_IN_SEC
-}
+++ /dev/null
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-batch_to_space_nd2
-cast
-concat
-conv_2d
-depthwise_conv_2d
-div
-embedding_lookup
-exp
-floor
-fullyconnected
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-max
-max_pool_2d
-mean
-min
-mul
-neg
-pack
-pad
-reduce_max
-reduce_mean
-reduce_sum/float
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-space_to_depth
-squeeze
-strided_slice
-sub
-tanh
-topk_v2
-transpose
-transpose_conv
+++ /dev/null
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-batch_to_space_nd2
-cast
-concat
-conv_2d
-depthwise_conv_2d
-div
-embedding_lookup
-exp
-floor
-fullyconnected
-gather
-hashtable_lookup
-l2_normalization
-l2_pool_2d
-max
-max_pool_2d
-mean
-min
-mul
-neg
-pack
-pad
-reduce_max
-reduce_mean
-reduce_sum/float
-relu
-relu6
-reshape
-resize_bilinear
-rsqrt
-slice
-softmax
-space_to_batch_nd2
-space_to_depth
-squeeze
-strided_slice
-sub
-tanh
-topk_v2
-transpose
-transpose_conv
+++ /dev/null
-Add_000.opt
-#ArgMax_000.opt
-#ArgMax_001.opt
-#ArgMax_002.opt
-#ArgMax_003.opt
-AveragePool2D_000.opt
-#AveragePool2D_U8_000.opt
-Concatenation_000.opt
-Conv2D_000.opt
-Conv2D_001.opt
-Conv2D_002.opt
-Conv2D_004.opt
-DepthwiseConv2D_000.opt
-DepthwiseConv2D_002.opt
-FullyConnected_000.opt
-FullyConnected_001.opt
-FullyConnected_003.opt
-#L2Normalize_U8_000.opt
-Logistic_000.opt
-#Logistic_U8_000.opt
-MaxPool2D_000.opt
-#Mean_000.opt
-#Mean_001.opt
-Mul_000.opt
-#Net_TConv_BN_000.opt
-#Net_UnpackAdd_001.opt
-Pad_000.opt
-Quantization_000.opt
-Reshape_000.opt
-Reshape_001.opt
-Reshape_002.opt
-Softmax_000.opt
-#SpaceToDepth_U8_000.opt
-#Split_000.opt
-#Tanh_U8_000.opt
-#TransposeConv_000.opt
-#TransposeConv_001.opt
-#Transpose_000.opt
-#Unpack_000.opt
-#Unpack_001.opt
-#Unpack_002.opt
-#Unpack_003.opt
conv_2d
depthwise_conv_2d
div
+embedding_lookup
exp
floor
-fullyconnected
+fullyconnected/fc1
+fullyconnected/matmul2x2
+fullyconnected/weights_as_input
gather
+hashtable_lookup
l2_normalization
max
max_pool_2d
conv_2d
depthwise_conv_2d
div
+embedding_lookup
floor
gather
+hashtable_lookup
l2_normalization
logistic
max
conv_2d
depthwise_conv_2d
div
+embedding_lookup
exp
floor
-fullyconnected
+fullyconnected/fc1
+fullyconnected/matmul2x2
+fullyconnected/weights_as_input
gather
+hashtable_lookup
l2_normalization
max
max_pool_2d
conv_2d
depthwise_conv_2d
div
+embedding_lookup
floor
fullyconnected
gather
+hashtable_lookup
l2_normalization
logistic
max
MODELS/inception_module
MODELS/mobilenet
-MODELS/mobilenet_quant8
abs
add
average_pool_2d
+++ /dev/null
-MODELS/inception_module
-MODELS/mobilenet
-add
-average_pool_2d
-concat
-conv_2d
-depthwise_conv_2d
-fullyconnected/fc1
-logistic
-max_pool_2d
-pad
-relu
-relu6
-reshape/reshape1
-softmax
-tanh
MODELS/inception_module
MODELS/mobilenet
-MODELS/mobilenet_quant8
add
average_pool_2d
concat
-#!/usr/bin/env python
+#!/usr/bin/env python3
#
# Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
#
function Usage()
{
- echo "Usage: ./$0 --driverbin={such as tflite_run} {tests to test or empty for all of tests}"
- echo "Usage: ./$0 --driverbin=Product/out/bin/tflite_run --reportdir=report --tapname=verification.tap avgpool1 avgpool2"
+ echo "Usage: ${BASH_SOURCE[0]} [OPTIONS] {tests to test or empty for all of tests}"
+ echo "(Ex) : ${BASH_SOURCE[0]} --driverbin=Product/out/bin/onert_run --reportdir=report --tapname=verification.tap avgpool1 avgpool2"
echo ""
- echo "--download - (default=on) Download model files"
- echo "--run - (default=on) Test model files"
- echo "--driverbin - (default=../../Product/out/bin/tflite_run) Runner for runnning model tests"
- echo "--reportdir - (default=report) Directory to place tap files"
- echo "--tapname - (default=framework_test.tap) File name to be written for tap"
- echo "--md5 - (default=on) MD5 check when download model files"
- echo "--configdir - (default=$TEST_ROOT_PATH) Config directory to download and test model"
- echo "--cachedir - (default=$CACHE_ROOT_PATH) Directory to download model"
+ echo "--driverbin : Runner for runnning model tests"
+ echo "--reportdir : (default=$REPORT_DIR) Directory to place tap files"
+ echo "--tapname : (default=$TAP_NAME) File name to be written for tap"
+ echo "--configdir : (default=$TEST_ROOT_PATH) Config directory to download and test model"
+ echo "--cachedir : (default=$CACHE_ROOT_PATH) Directory to download model"
echo ""
}
-function need_download()
-{
- LOCAL_PATH=$1
- REMOTE_URL=$2
- if [ ! -e $LOCAL_PATH ]; then
- return 0;
- fi
- # Ignore checking md5 in cache
- # TODO Use "--md5" option only and remove IGNORE_MD5 environment variable
- if [ ! -z $IGNORE_MD5 ] && [ "$IGNORE_MD5" == "1" ]; then
- return 1
- fi
- if [ "$MD5_CHECK" = "off" ]; then
- return 1
- fi
-
- LOCAL_HASH=$(md5sum $LOCAL_PATH | awk '{ print $1 }')
- REMOTE_HASH=$(curl -ss $REMOTE_URL | md5sum | awk '{ print $1 }')
- # TODO Emit an error when Content-MD5 field was not found. (Server configuration issue)
- if [ "$LOCAL_HASH" != "$REMOTE_HASH" ]; then
- echo "Downloaded file is outdated or incomplete."
- return 0
- fi
- return 1
-}
-
-DRIVER_BIN=""
+DRIVER_BIN="$NNFW_HOME/Product/out/bin/onert_run"
TAP_NAME="framework_test.tap"
TEST_LIST=()
-DOWNLOAD_MODEL="on"
-RUN_TEST="on"
-MD5_CHECK="on"
# Support environment variable setting for mirror server
FIXED_MODELFILE_SERVER="${MODELFILE_SERVER:-}"
--tapname=*)
TAP_NAME=${i#*=}
;;
- --download=*)
- DOWNLOAD_MODEL=${i#*=}
- ;;
- --md5=*)
- MD5_CHECK=${i#*=}
- ;;
- --run=*)
- RUN_TEST=${i#*=}
- ;;
--configdir=*)
TEST_ROOT_PATH=${i#*=}
;;
RUN_DISABLED="false"
fi
-if [ ! -n "$DRIVER_BIN" ]; then
- DRIVER_BIN="$NNFW_HOME/Product/out/bin/tflite_run"
-fi
-
if [ ! -d "$TEST_ROOT_PATH" ]; then
echo "Cannot find config directory for test: please set proper configdir"
exit 1
fi
# Check test driver setting
-if ! command_exists $DRIVER_BIN && [ "$RUN_TEST" = "on" ]; then
+if ! command_exists $DRIVER_BIN ; then
echo "Cannot find test driver" $DRIVER_BIN ": please set proper DRIVER_BIN"
exit 1
fi
return $TOTAL_RESULT
}
-download_tests()
-{
- SELECTED_TESTS=$@
-
- echo ""
- echo "Downloading tests:"
- echo "======================"
- for TEST_NAME in $SELECTED_TESTS; do
- echo $TEST_NAME
- done
- echo "======================"
-
- i=0
- for TEST_NAME in $SELECTED_TESTS; do
- # Test configure initialization
- ((i++))
- MODELFILE_URL_BASE=""
- MODELFILE_NAME=""
- source $TEST_ROOT_PATH/$TEST_NAME/config.sh
-
- MODELFILE=$CACHE_ROOT_PATH/$MODELFILE_NAME
- MODELFILE_URL="$MODELFILE_URL_BASE/$MODELFILE_NAME"
- if [ -n "$FIXED_MODELFILE_SERVER" ]; then
- MODELFILE_URL="$FIXED_MODELFILE_SERVER/$MODELFILE_NAME"
- fi
-
- # Download model file
- if [ ! -e $CACHE_ROOT_PATH ]; then
- mkdir -p $CACHE_ROOT_PATH
- fi
-
- # Download unless we have it in cache (Also check md5sum)
- if need_download "$MODELFILE" "$MODELFILE_URL"; then
- echo ""
- echo "Download test file for $TEST_NAME"
- echo "======================"
-
- rm -f $MODELFILE # Remove invalid file if exists
- pushd $CACHE_ROOT_PATH
- wget -nv $MODELFILE_URL
- if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
- unzip -o $MODELFILE_NAME -d ${MODELFILE_NAME%.zip}
- fi
- popd
- fi
-
- done
-}
-
-
find_tests()
{
local TEST_DIRS="$@"
mkdir -p $REPORT_DIR
TESTS_TO_RUN=$(find_tests ${TEST_LIST[@]})
+run_tests $TESTS_TO_RUN
-if [ "$DOWNLOAD_MODEL" = "on" ]; then
- download_tests $TESTS_TO_RUN
-fi
-
-if [ "$RUN_TEST" = "on" ]; then
- run_tests $TESTS_TO_RUN
-fi
exit $?
[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
+# Common variables which are used on test commands
+# DRIVER_PATH: test driver and related resources forder
+# INSTALL_PATH: test package installed folder
+# CACHE_PATH: cache folder for test resource download
DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
INSTALL_PATH="$(dirname $DRIVER_PATH)"
-COMMAND_PATH=$INSTALL_PATH/test/command
-BIN_PATH=$INSTALL_PATH/bin
+CACHE_PATH=$DRIVER_PATH/cache
-export PATH=$BIN_PATH:$PATH
+export PATH=$INSTALL_PATH/bin:$PATH
+
+COMMAND_PATH=$DRIVER_PATH/command
function Usage()
{
+++ /dev/null
-#!/bin/bash
-#
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-# NOTE: Supposed that this script would be executed with an artifact path.
-# The artifact path has tests/(test suite) and Product/
-# Reference this PR(https://github.sec.samsung.net/STAR/nnfw/pull/375).
-
-function Usage()
-{
- echo "Usage: ./$0 --artifactpath=. # run all tests"
- echo "Usage: ./$0 --artifactpath=/home/dragon/nnfw --frameworktest --verification --benchmark # run fw test & verfication and benchmark"
- echo ""
- echo "--artifactpath - (default={test-driver.sh's path}/../../) it should contain tests/ and Product/"
- echo ""
- echo "Following options are needed when you want to tests of specific types. If you don't pass any one, unittest and verification will be run"
- echo "--frameworktest - (default=off) run framework test"
- echo "--verification - (default=on) run verification"
- echo "--frameworktest_list_file - filepath of model list for test"
- echo ""
- echo "Following option is only needed when you want to test benchmark."
- echo "--benchmark_onert_op - (default=off) run benchmark per operation on onert"
- echo ""
- echo "etc."
- echo "--framework_driverbin - (default=../../Product/out/bin/tflite_run) runner for runnning framework tests"
- echo "--verification_driverbin - (default=../../Product/out/bin/nnapi_test) runner for runnning verification tests"
- echo ""
- echo "--reportdir - (default=\$ARTIFACT_PATH/report) directory to save report"
- echo ""
-}
-
-TEST_DRIVER_DIR="$( cd "$( dirname "${BASH_SOURCE}" )" && pwd )"
-ARTIFACT_PATH="$TEST_DRIVER_DIR/../../"
-FRAMEWORK_DRIVER_BIN=""
-VERIFICATION_DRIVER_BIN=""
-ALLTEST_ON="true"
-FRAMEWORKTEST_ON="false"
-VERIFICATION_ON="false"
-BENCHMARK_ONERT_OP_ON="false"
-REPORT_DIR=""
-
-for i in "$@"
-do
- case $i in
- -h|--help|help)
- Usage
- exit 1
- ;;
- --artifactpath=*)
- ARTIFACT_PATH=${i#*=}
- ;;
- --framework_driverbin=*)
- FRAMEWORK_DRIVER_BIN=${i#*=}
- ;;
- --verification_driverbin=*)
- VERIFICATION_DRIVER_BIN=${i#*=}
- ;;
- --frameworktest)
- ALLTEST_ON="false"
- FRAMEWORKTEST_ON="true"
- ;;
- --frameworktest_list_file=*)
- FRAMEWORKTEST_LIST_FILE=$PWD/${i#*=}
- if [ ! -e "$FRAMEWORKTEST_LIST_FILE" ]; then
- echo "Pass on with proper frameworktest_list_file"
- exit 1
- fi
- ;;
- --verification)
- ALLTEST_ON="false"
- VERIFICATION_ON="true"
- ;;
- --benchmark_onert_op)
- ALLTEST_ON="false"
- BENCHMARK_ONERT_OP_ON="true"
- ;;
- --reportdir=*)
- REPORT_DIR=${i#*=}
- ;;
- *)
- # Be careful that others params are handled as $ARTIFACT_PATH
- ARTIFACT_PATH="$i"
- ;;
- esac
- shift
-done
-
-ARTIFACT_PATH="$(readlink -f $ARTIFACT_PATH)"
-
-if [ -z "$UNIT_TEST_DIR" ]; then
- UNIT_TEST_DIR=$ARTIFACT_PATH/Product/out/unittest
-fi
-
-if [ -z "$REPORT_DIR" ]; then
- REPORT_DIR=$ARTIFACT_PATH/report
-fi
-
-source $TEST_DRIVER_DIR/common.sh
-
-# Run tflite_run with various tflite models
-if [ "$FRAMEWORKTEST_ON" == "true" ]; then
- if [ -z "$FRAMEWORK_DRIVER_BIN" ]; then
- FRAMEWORK_DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/tflite_run
- fi
-
- $TEST_DRIVER_DIR/test_framework.sh \
- --driverbin=$FRAMEWORK_DRIVER_BIN \
- --reportdir=$REPORT_DIR \
- --tapname=framework_test.tap \
- --logname=framework_test.log \
- --testname="Frameworktest" \
- --frameworktest_list_file=${FRAMEWORKTEST_LIST_FILE:-}
-fi
-
-# Run nnapi_test with various tflite models
-if [ "$ALLTEST_ON" == "true" ] || [ "$VERIFICATION_ON" == "true" ]; then
- if [ -z "$VERIFICATION_DRIVER_BIN" ]; then
- VERIFICATION_DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/nnapi_test
- fi
-
- # verification uses the same script as frameworktest does
- $TEST_DRIVER_DIR/test_framework.sh \
- --driverbin=$VERIFICATION_DRIVER_BIN \
- --reportdir=$REPORT_DIR \
- --tapname=verification_test.tap \
- --logname=verification_test.log \
- --testname="Verification" \
- --frameworktest_list_file=${FRAMEWORKTEST_LIST_FILE:-}
-fi
-
-if [ "$BENCHMARK_ONERT_OP_ON" == "true" ]; then
- DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/tflite_run
-
- $TEST_DRIVER_DIR/benchmark_nnapi.sh \
- --test_op \
- --driverbin=$DRIVER_BIN \
- --reportdir=$REPORT_DIR/benchmark_op \
- --modelfilepath=$ARTIFACT_PATH/tests/scripts/models
-fi
-
-# Make json file. Actually, this process is only needed on CI. That's why it is in test-driver.sh.
-if [ "$BENCHMARK_ONERT_OP_ON" == "true" ]; then
- # functions to fill json with benchmark results
- source $ARTIFACT_PATH/tests/scripts/print_to_json.sh
- if [ "$BENCHMARK_ONERT_OP_ON" == "true" ]; then
- print_to_json $REPORT_DIR/benchmark_op $REPORT_DIR "benchmark_op_result.json"
- else
- print_to_json $REPORT_DIR/benchmark $REPORT_DIR "benchmark_result.json"
- fi
-fi
+++ /dev/null
-#!/bin/bash
-#
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-FWTEST_DRIVER_BIN=
-FWTEST_REPORT_DIR=
-FWTEST_TAP_NAME=
-FWTEST_LOG_NAME=
-FWTEST_TEST_NAME=
-
-function Usage()
-{
- echo "Usage Example:"
- echo "./$0 \\"
- echo " --driverbin=Product/out/bin/tflite_run \\ # Test driver path"
- echo " --frameworktest_list_file=tests/scripts/list/frameworktest_list.armv7l.cpu.txt \\"
- echo " --reportdir=report \\ # Directory for the report files will be saved"
- echo " --tapname=framework_test.tap \\ # Tap file name"
- echo " --logname=framework_test.log \\ # Log file name"
- echo " --testname=Frameworktest # Name of the test just a label of tests"
-
- exit 1
-}
-
-for i in "$@"
-do
- case $i in
- -h|--help|help)
- Usage
- ;;
- --driverbin=*)
- FWTEST_DRIVER_BIN=${i#*=}
- ;;
- --reportdir=*)
- FWTEST_REPORT_DIR=${i#*=}
- ;;
- --tapname=*)
- FWTEST_TAP_NAME=${i#*=}
- ;;
- --logname=*)
- FWTEST_LOG_NAME=${i#*=}
- ;;
- --testname=*)
- FWTEST_TEST_NAME=${i#*=}
- ;;
- --frameworktest_list_file=*)
- FRAMEWORKTEST_LIST_FILE=${i#*=}
- ;;
- esac
- shift
-done
-
-[ ! -z "$FWTEST_DRIVER_BIN" ] || Usage
-[ ! -z "$FWTEST_REPORT_DIR" ] || Usage
-[ ! -z "$FWTEST_TAP_NAME" ] || Usage
-[ ! -z "$FWTEST_LOG_NAME" ] || Usage
-[ ! -z "$FWTEST_TEST_NAME" ] || Usage
-
-if [ ! -e "$FWTEST_REPORT_DIR" ]; then
- mkdir -p $FWTEST_REPORT_DIR
-fi
-
-echo ""
-echo "============================================"
-echo "$FWTEST_TEST_NAME with $(basename $FWTEST_DRIVER_BIN) ..."
-
-if [ ! -z "$FRAMEWORKTEST_LIST_FILE" ]; then
- MODELLIST=$(cat "${FRAMEWORKTEST_LIST_FILE}")
-fi
-
-$MY_PATH/models/run_test.sh --driverbin=$FWTEST_DRIVER_BIN \
- --reportdir=$FWTEST_REPORT_DIR \
- --tapname=$FWTEST_TAP_NAME \
- ${MODELLIST:-} \
- > $FWTEST_REPORT_DIR/$FWTEST_LOG_NAME 2>&1
-FWTEST_RESULT=$?
-if [[ $FWTEST_RESULT -ne 0 ]]; then
- echo ""
- cat $FWTEST_REPORT_DIR/$FWTEST_TAP_NAME
- echo ""
- echo "$FWTEST_TEST_NAME failed... exit code: $FWTEST_RESULT"
- echo "============================================"
- echo ""
- exit $FWTEST_RESULT
-fi
-
-echo ""
-cat $FWTEST_REPORT_DIR/$FWTEST_TAP_NAME
-echo "============================================"
-echo ""
#!/bin/bash
+set -eo pipefail
+
MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
source $MY_PATH/common.sh
# Run profiler BACKEND_CNT+1 times: on each run of the first BACKEND_CNT runs it will
# collect metrics for one unmeasured backend. On the last run metrics for data transfer
PROFILING_RUN_CNT=$((BACKEND_CNT+1))
-TEST_DRIVER_DIR="$( cd "$( dirname "${BASH_SOURCE}" )" && pwd )"
-ARTIFACT_PATH="$TEST_DRIVER_DIR/../.."
-BENCHMARK_DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/tflite_run
+ARTIFACT_PATH="$MY_PATH/../.."
+BENCHMARK_DRIVER_BIN=$INSTALL_PATH/bin/onert_run
REPORT_DIR=$ARTIFACT_PATH/report
-RUN_TEST_SH=$ARTIFACT_PATH/tests/scripts/models/run_test.sh
+RUN_TEST_SH=$INSTALL_PATH/test/models/run_test.sh
BENCHMARK_MODEL_LIST="MODELS/inception_nonslim MODELS/inception_slim MODELS/mobilenet"
if [ ! -e "$RUN_TEST_SH" ]; then
exit 1
fi
-
BENCHMARK_REPORT_DIR=$REPORT_DIR/benchmark
BENCHMARK_MODELS_FILE=$BENCHMARK_REPORT_DIR/benchmark_models.txt
+# Cleanup report files
+rm -rf $BENCHMARK_REPORT_DIR
+rm -f $BENCHMARK_MODELS_FILE
+mkdir -p $BENCHMARK_REPORT_DIR
+touch $BENCHMARK_MODELS_FILE
+
+# Prepare models
+prepare_test_model
+
function run_without_sched()
{
local RESULT_SCH_INT=$1
RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
- printf -v RESULT_INT '%d' $RESULT 2>/dev/null
+ printf -v RESULT_INT '%.0f' $RESULT
PERCENTAGE=$((100-RESULT_SCH_INT*100/RESULT_INT))
echo "$RESULT ms. Parallel scheduler is $PERCENTAGE% faster"
}
export COUNT=5
echo "============================================"
local i=0
- export USE_NNAPI=1
export BACKENDS="acl_cl;acl_neon;cpu"
# Remove metrics so that profiler can get metrics for operations
# with input&output sizes the same as the model
- rm "exec_time.json" 2>/dev/null
+ rm -f "exec_time.json" 2>/dev/null
for MODEL in $BENCHMARK_MODEL_LIST; do
echo "Benchmark test with `basename $BENCHMARK_DRIVER_BIN` & `echo $MODEL`"
print_with_dots "Profiling run #$j out of $PROFILING_RUN_CNT"
- $RUN_TEST_SH --driverbin=$BENCHMARK_DRIVER_BIN $MODEL > $LOG_FILE 2>&1
+ RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
RET=$?
if [[ $RET -ne 0 ]]; then
echo "Profiling $MODEL aborted in run#$j... exit code: $RET"
done
unset ONERT_LOG_ENABLE
-
##################################################################################
# Turn off profiling
##################################################################################
RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
echo "$RESULT ms"
- printf -v RESULT_SCH_INT '%d' $RESULT 2>/dev/null
+ printf -v RESULT_SCH_INT '%.0f' $RESULT
mv "after_lower_subg-0.dot" $REPORT_MODEL_DIR/"after_lower_subg-0_parallel.dot"
RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
- printf -v RESULT_INT '%d' $RESULT 2>/dev/null
+ printf -v RESULT_INT '%.0f' $RESULT
PERCENTAGE=$((100-RESULT_SCH_INT*100/RESULT_INT))
echo "$RESULT ms. Parallel scheduler is $PERCENTAGE% faster"
unset BACKENDS
echo "============================================"
unset COUNT
- unset USE_NNAPI
}
echo ""
+++ /dev/null
-if(NOT BUILD_NNAPI_TEST)
- return()
-endif(NOT BUILD_NNAPI_TEST)
-
-list(APPEND SOURCES "src/nnapi_test.cc")
-list(APPEND SOURCES "src/args.cc")
-
-nnfw_find_package(Boost REQUIRED program_options)
-
-add_executable(nnapi_test ${SOURCES})
-target_include_directories(nnapi_test PRIVATE ${Boost_INCLUDE_DIRS})
-target_link_libraries(nnapi_test nnfw_lib_tflite)
-target_link_libraries(nnapi_test ${Boost_PROGRAM_OPTIONS_LIBRARY})
-install(TARGETS nnapi_test DESTINATION bin)
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "args.h"
-
-#include <iostream>
-
-namespace nnapi_test
-{
-
-Args::Args(const int argc, char **argv)
-{
- Initialize();
- try
- {
- Parse(argc, argv);
- }
- catch (const std::exception &e)
- {
- std::cerr << "The argments that cannot be parsed: " << e.what() << '\n';
- print(argv);
- exit(255);
- }
-}
-
-void Args::print(char **argv)
-{
- std::cout << "nnapi_test\n\n";
- std::cout << "Usage: " << argv[0] << " <.tflite> [<options>]\n\n";
- std::cout << _options;
- std::cout << "\n";
-}
-
-void Args::Initialize(void)
-{
- // General options
- po::options_description general("General options", 100);
-
- // clang-format off
- general.add_options()
- ("help,h", "Print available options")
- ("tflite", po::value<std::string>()->required())
- ("seed", po::value<int>()->default_value(0), "The seed of random inputs")
- ("num_runs", po::value<int>()->default_value(2), "The number of runs")
- ;
- // clang-format on
-
- _options.add(general);
- _positional.add("tflite", 1);
- _positional.add("seed", 2);
-}
-
-void Args::Parse(const int argc, char **argv)
-{
- po::variables_map vm;
- po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
- vm);
-
- if (vm.count("help"))
- {
- print(argv);
-
- exit(0);
- }
-
- po::notify(vm);
- if (vm.count("tflite"))
- {
- _tflite_filename = vm["tflite"].as<std::string>();
-
- if (_tflite_filename.empty())
- {
- std::cerr << "Please specify tflite file.\n";
- print(argv);
- exit(255);
- }
- else
- {
- if (access(_tflite_filename.c_str(), F_OK) == -1)
- {
- std::cerr << "tflite file not found: " << _tflite_filename << "\n";
- exit(255);
- }
- }
- }
-
- if (vm.count("seed"))
- {
- _seed = vm["seed"].as<int>();
- }
-
- if (vm.count("num_runs"))
- {
- _num_runs = vm["num_runs"].as<int>();
- if (_num_runs < 0)
- {
- std::cerr << "num_runs value must be greater than 0.\n";
- exit(255);
- }
- }
-}
-
-} // end of namespace nnapi_test
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNAPI_TEST_ARGS_H__
-#define __NNAPI_TEST_ARGS_H__
-
-#include <boost/program_options.hpp>
-#include <string>
-
-namespace po = boost::program_options;
-
-namespace nnapi_test
-{
-
-class Args
-{
-public:
- Args(const int argc, char **argv);
- void print(char **argv);
-
- const std::string &getTfliteFilename(void) const { return _tflite_filename; }
- const int getSeed(void) const { return _seed; }
- const int getNumRuns(void) const { return _num_runs; }
-
-private:
- void Initialize();
- void Parse(const int argc, char **argv);
-
-private:
- po::positional_options_description _positional;
- po::options_description _options;
-
- std::string _tflite_filename;
- int _seed;
- int _num_runs;
-};
-
-} // end of namespace nnapi_test
-
-#endif // __NNAPI_TEST_ARGS_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tensorflow/lite/model.h"
-
-#include "tflite/interp/FlatBufferBuilder.h"
-#include "tflite/RandomTestRunner.h"
-
-#include <iostream>
-#include <stdexcept>
-
-#include "args.h"
-
-using namespace tflite;
-using namespace nnfw::tflite;
-using namespace nnapi_test;
-
-int main(const int argc, char **argv)
-{
- Args args(argc, argv);
-
- const auto filename = args.getTfliteFilename();
-
- StderrReporter error_reporter;
-
- auto model = FlatBufferModel::BuildFromFile(filename.c_str(), &error_reporter);
-
- if (model == nullptr)
- {
- // error_reporter must have shown the error message already
- return 1;
- }
-
- const nnfw::tflite::FlatBufferBuilder builder(*model);
-
- try
- {
- const auto seed = static_cast<uint32_t>(args.getSeed());
- auto runner = nnfw::tflite::RandomTestRunner::make(seed);
- const auto num_runs = static_cast<size_t>(args.getNumRuns());
- runner.compile(builder);
- return runner.run(num_runs);
- }
- catch (const std::exception &e)
- {
- std::cerr << e.what() << std::endl;
- return 1;
- }
-}
+++ /dev/null
-if(NOT BUILD_NNPACKAGE_RUN)
- return()
-endif(NOT BUILD_NNPACKAGE_RUN)
-
-if(NOT BUILD_ONERT)
- return()
-endif(NOT BUILD_ONERT)
-
-list(APPEND NNPACKAGE_RUN_SRCS "src/nnpackage_run.cc")
-list(APPEND NNPACKAGE_RUN_SRCS "src/args.cc")
-list(APPEND NNPACKAGE_RUN_SRCS "src/nnfw_util.cc")
-list(APPEND NNPACKAGE_RUN_SRCS "src/randomgen.cc")
-list(APPEND NNPACKAGE_RUN_SRCS "src/rawformatter.cc")
-
-nnfw_find_package(Boost REQUIRED program_options)
-nnfw_find_package(Ruy QUIET)
-nnfw_find_package(HDF5 QUIET)
-
-if (HDF5_FOUND)
- list(APPEND NNPACKAGE_RUN_SRCS "src/h5formatter.cc")
-endif()
-
-add_executable(nnpackage_run ${NNPACKAGE_RUN_SRCS})
-
-if (HDF5_FOUND)
- target_compile_definitions(nnpackage_run PRIVATE ONERT_HAVE_HDF5=1)
- target_include_directories(nnpackage_run PRIVATE ${HDF5_INCLUDE_DIRS})
- target_link_libraries(nnpackage_run ${HDF5_CXX_LIBRARIES})
-else()
- message(WARNING "HDF5 NOT found. Install libhdf5-dev or set EXT_HDF5_DIR to support load/dump in nnpackage_run.")
-endif(HDF5_FOUND)
-
-target_include_directories(nnpackage_run PRIVATE src)
-target_include_directories(nnpackage_run PRIVATE ${Boost_INCLUDE_DIRS})
-
-target_link_libraries(nnpackage_run nnfw_lib_tflite jsoncpp)
-target_link_libraries(nnpackage_run nnfw-dev)
-target_link_libraries(nnpackage_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
-target_link_libraries(nnpackage_run nnfw_lib_benchmark)
-if(Ruy_FOUND AND PROFILE_RUY)
- target_link_libraries(nnpackage_run ruy_instrumentation)
- target_link_libraries(nnpackage_run ruy_profiler)
-endif(Ruy_FOUND AND PROFILE_RUY)
-
-install(TARGETS nnpackage_run DESTINATION bin)
+++ /dev/null
-# nnpackage_run
-
-`nnpackage_run` is a tool to run `nnpackage`.
-
-It takes `nnpackage` as input. It uses **runtime API** internally.
-
-## Usage
-
-### Simple run
-
-This will run with random input data
-
-```
-$ ./nnpackage_run path_to_nnpackage_directory
-```
-
-Output would look like:
-
-```
-nnfw_prepare takes 425.235 ms
-nnfw_run takes 2.525 ms
-```
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_ALLOCATION_H__
-#define __NNPACKAGE_RUN_ALLOCATION_H__
-
-#include <cstdlib>
-#include <cstdint>
-
-namespace nnpkg_run
-{
-class Allocation
-{
-public:
- Allocation() : data_(nullptr) {}
- ~Allocation() { free(data_); }
- void *data() const { return data_; }
- void *alloc(uint64_t sz) { return data_ = malloc(sz); }
-
-private:
- void *data_;
-};
-} // namespace nnpkg_run
-
-#endif // __NNPACKAGE_RUN_ALLOCATION_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "args.h"
-
-#include <functional>
-#include <iostream>
-#include <json/json.h>
-
-namespace
-{
-
-// This function parses a json object and returns as a vector of integers
-// For example,
-// [0, [1, 2, 3, 4], 3, 40, 4, []] in JSON
-// is converted to:
-// {
-// 0 -> [1, 2, 3, 4]
-// 3 -> 40
-// 4 -> []
-// } in std::unordered_map. Note that the value type is still Json::Value.
-std::unordered_map<uint32_t, Json::Value> argArrayToMap(const Json::Value &jsonval)
-{
- if (!jsonval.isArray() || (jsonval.size() % 2 != 0))
- {
- std::cerr << "JSON argument must be an even-sized array in JSON\n";
- exit(1);
- }
-
- std::unordered_map<uint32_t, Json::Value> ret;
- for (uint32_t i = 0; i < jsonval.size(); i += 2)
- {
- if (!jsonval[i].isUInt())
- {
- std::cerr << "Key values(values in even indices) must be unsigned integers\n";
- exit(1);
- }
- uint32_t key = jsonval[i].asUInt();
- Json::Value val = jsonval[i + 1];
- ret[key] = jsonval[i + 1];
- }
- return ret;
-}
-
-// param shape_str is a form of, e.g., "[1, [2, 3], 3, []]" or "h5"
-void handleShapeJsonParam(nnpkg_run::TensorShapeMap &shape_map, const std::string &shape_str)
-{
- Json::Value root;
- Json::Reader reader;
- if (!reader.parse(shape_str, root, false))
- {
- std::cerr << "Invalid JSON format for output_sizes \"" << shape_str << "\"\n";
- exit(1);
- }
-
- auto arg_map = argArrayToMap(root);
- for (auto &pair : arg_map)
- {
- uint32_t key = pair.first;
- Json::Value &shape_json = pair.second;
- if (!shape_json.isArray())
- {
- std::cerr << "All the values must be list: " << shape_str << "\n";
- exit(1);
- }
-
- std::vector<int> shape;
- for (auto &dim_json : shape_json)
- {
- if (!dim_json.isUInt())
- {
- std::cerr << "All the dims should be dim >= 0: " << shape_str << "\n";
- exit(1);
- }
-
- shape.emplace_back(dim_json.asUInt64());
- }
-
- shape_map[key] = shape;
- }
-}
-
-} // namespace
-
-namespace nnpkg_run
-{
-
-Args::Args(const int argc, char **argv)
-{
- Initialize();
- Parse(argc, argv);
-}
-
-void Args::Initialize(void)
-{
- auto process_nnpackage = [&](const std::string &package_filename) {
- _package_filename = package_filename;
-
- std::cerr << "Package Filename " << _package_filename << std::endl;
- if (_package_filename.empty())
- {
- // TODO Print usage instead of the below message
- std::cerr << "Please specify nnpackage file. Run with `--help` for usage."
- << "\n";
-
- exit(1);
- }
- else
- {
- if (access(_package_filename.c_str(), F_OK) == -1)
- {
- std::cerr << "nnpackage not found: " << _package_filename << "\n";
- }
- }
- };
-
- auto process_output_sizes = [&](const std::string &output_sizes_json_str) {
- Json::Value root;
- Json::Reader reader;
- if (!reader.parse(output_sizes_json_str, root, false))
- {
- std::cerr << "Invalid JSON format for output_sizes \"" << output_sizes_json_str << "\"\n";
- exit(1);
- }
-
- auto arg_map = argArrayToMap(root);
- for (auto &pair : arg_map)
- {
- uint32_t key = pair.first;
- Json::Value &val_json = pair.second;
- if (!val_json.isUInt())
- {
- std::cerr << "All the values in `output_sizes` must be unsigned integers\n";
- exit(1);
- }
- uint32_t val = val_json.asUInt();
- _output_sizes[key] = val;
- }
- };
-
- auto process_shape_prepare = [&](const std::string &shape_str) {
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
- if (shape_str == "H5" || shape_str == "h5")
- {
- _when_to_use_h5_shape = WhenToUseH5Shape::PREPARE;
- return;
- }
-#endif
- try
- {
- handleShapeJsonParam(_shape_prepare, shape_str);
- }
- catch (const std::exception &e)
- {
- std::cerr << "error with '--shape_prepare' option: " << shape_str << std::endl;
- exit(1);
- }
- };
-
- auto process_shape_run = [&](const std::string &shape_str) {
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
- if (shape_str == "H5" || shape_str == "h5")
- {
- _when_to_use_h5_shape = WhenToUseH5Shape::RUN;
- return;
- }
-#endif
- try
- {
- handleShapeJsonParam(_shape_run, shape_str);
- }
- catch (const std::exception &e)
- {
- std::cerr << "error with '--shape_run' option: " << shape_str << std::endl;
- exit(1);
- }
- };
-
- // General options
- po::options_description general("General options", 100);
-
- // clang-format off
- general.add_options()
- ("help,h", "Print available options")
- ("version", "Print version and exit immediately")
- ("nnpackage", po::value<std::string>()->required()->notifier(process_nnpackage))
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
- ("dump,d", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_filename = v; }), "Output filename")
- ("load,l", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_filename = v; }), "Input filename")
-#endif
- ("dump:raw", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_raw_filename = v; }), "Raw Output filename")
- ("load:raw", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_raw_filename = v; }), "Raw Input filename")
- ("output_sizes", po::value<std::string>()->notifier(process_output_sizes),
- "The output buffer size in JSON 1D array\n"
- "If not given, the model's output sizes are used\n"
- "e.g. '[0, 40, 2, 80]' to set 0th tensor to 40 and 2nd tensor to 80.\n")
- ("num_runs,r", po::value<int>()->default_value(1)->notifier([&](const auto &v) { _num_runs = v; }), "The number of runs")
- ("warmup_runs,w", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _warmup_runs = v; }), "The number of warmup runs")
- ("run_delay,t", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _run_delay = v; }), "Delay time(us) between runs (as default no delay")
- ("gpumem_poll,g", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _gpumem_poll = v; }), "Check gpu memory polling separately")
- ("mem_poll,m", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _mem_poll = v; }), "Check memory polling")
- ("write_report,p", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _write_report = v; }),
- "Write report\n"
- "{exec}-{nnpkg}-{backend}.csv will be generated.\n"
- "e.g. nnpackage_run-UNIT_Add_000-acl_cl.csv.\n"
- "{nnpkg} name may be changed to realpath if you use symbolic-link.")
- ("shape_prepare", po::value<std::string>()->default_value("[]")->notifier(process_shape_prepare),
- "Please refer to the description of 'shape_run'")
- ("shape_run", po::value<std::string>()->default_value("[]")->notifier(process_shape_run),
- "'--shape_prepare: set shape of tensors before compilation (before calling nnfw_prepare()).\n"
- "'--shape_run: set shape of tensors before running (before calling nnfw_run()).\n"
- "Allowed value:.\n"
- "'[0, [1, 2], 2, []]': set 0th tensor to [1, 2] and 2nd tensor to [] (scalar).\n"
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
- "'h5': read shape(s) from H5 input file. '--load' should also be provided.\n"
- "if '--load' option is provided but '--shape_prepare' or '--shape_run' is not provided,\n"
- "'--shape_run h5' will be used by default.\n"
-#endif
- "For detailed description, please consutl the description of nnfw_set_input_tensorinfo()\n"
- )
- ("verbose_level,v", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _verbose_level = v; }),
- "Verbose level\n"
- "0: prints the only result. Messages btw run don't print\n"
- "1: prints result and message btw run\n"
- "2: prints all of messages to print\n")
- ;
- // clang-format on
-
- _options.add(general);
- _positional.add("nnpackage", 1);
-}
-
-void Args::Parse(const int argc, char **argv)
-{
- po::variables_map vm;
- po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
- vm);
-
- {
- auto conflicting_options = [&](const std::string &o1, const std::string &o2) {
- if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted()))
- {
- throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 +
- "' cannot be given at once.");
- }
- };
-
- // calling, e.g., "nnpackage_run .. -- shape_prepare .. --shape_run .." should theoretically
- // work but allowing both options together on command line makes the usage and implemenation
- // of nnpackage_run too complicated. Therefore let's not allow those option together.
- conflicting_options("shape_prepare", "shape_run");
- }
-
- if (vm.count("help"))
- {
- std::cout << "nnpackage_run\n\n";
- std::cout << "Usage: " << argv[0] << " path to nnpackage root directory [<options>]\n\n";
- std::cout << _options;
- std::cout << "\n";
-
- exit(0);
- }
-
- if (vm.count("version"))
- {
- _print_version = true;
- return;
- }
-
- try
- {
- po::notify(vm);
- }
- catch (const std::bad_cast &e)
- {
- std::cerr << "Bad cast error - " << e.what() << '\n';
- exit(1);
- }
-
- // This must be run after `notify` as `_warm_up_runs` must have been processed before.
- if (vm.count("mem_poll"))
- {
- // Instead of EXECUTE to avoid overhead, memory polling runs on WARMUP
- if (_mem_poll && _warmup_runs == 0)
- {
- _warmup_runs = 1;
- }
- }
-}
-
-bool Args::shapeParamProvided()
-{
- bool provided = false;
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
- // "--shape_run h5" or "--shape_prepare h5" was provided
- provided = (getWhenToUseH5Shape() != WhenToUseH5Shape::NOT_PROVIDED);
-#endif
- // specific shape was provided
- // e.g., "--shape_run '[0, [10, 1]]'" or "--shape_prepare '[0, [10, 1]]'"
- provided |= (!getShapeMapForPrepare().empty()) || (!getShapeMapForRun().empty());
-
- return provided;
-}
-
-} // end of namespace nnpkg_run
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_ARGS_H__
-#define __NNPACKAGE_RUN_ARGS_H__
-
-#include <string>
-#include <unordered_map>
-#include <vector>
-#include <boost/program_options.hpp>
-
-#include "types.h"
-
-namespace po = boost::program_options;
-
-namespace nnpkg_run
-{
-
-using TensorShapeMap = std::unordered_map<uint32_t, TensorShape>;
-
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-enum class WhenToUseH5Shape
-{
- NOT_PROVIDED, // Param not provided
- PREPARE, // read shapes in h5 file and set them as inputs' shape before calling nnfw_prepare()
- RUN, // read shapes in h5 file and set them as inputs' shape before calling nnfw_run()
-};
-#endif
-
-class Args
-{
-public:
- Args(const int argc, char **argv);
- void print(void);
-
- const std::string &getPackageFilename(void) const { return _package_filename; }
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
- const std::string &getDumpFilename(void) const { return _dump_filename; }
- const std::string &getLoadFilename(void) const { return _load_filename; }
- WhenToUseH5Shape getWhenToUseH5Shape(void) const { return _when_to_use_h5_shape; }
-#endif
- const std::string &getDumpRawFilename(void) const { return _dump_raw_filename; }
- const std::string &getLoadRawFilename(void) const { return _load_raw_filename; }
- const int getNumRuns(void) const { return _num_runs; }
- const int getWarmupRuns(void) const { return _warmup_runs; }
- const int getRunDelay(void) const { return _run_delay; }
- std::unordered_map<uint32_t, uint32_t> getOutputSizes(void) const { return _output_sizes; }
- const bool getGpuMemoryPoll(void) const { return _gpumem_poll; }
- const bool getMemoryPoll(void) const { return _mem_poll; }
- const bool getWriteReport(void) const { return _write_report; }
- const bool printVersion(void) const { return _print_version; }
- TensorShapeMap &getShapeMapForPrepare() { return _shape_prepare; }
- TensorShapeMap &getShapeMapForRun() { return _shape_run; }
- /// @brief Return true if "--shape_run" or "--shape_prepare" is provided
- bool shapeParamProvided();
- const int getVerboseLevel(void) const { return _verbose_level; }
-
-private:
- void Initialize();
- void Parse(const int argc, char **argv);
-
-private:
- po::positional_options_description _positional;
- po::options_description _options;
-
- std::string _package_filename;
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
- std::string _dump_filename;
- std::string _load_filename;
- WhenToUseH5Shape _when_to_use_h5_shape = WhenToUseH5Shape::NOT_PROVIDED;
-#endif
- std::string _dump_raw_filename;
- std::string _load_raw_filename;
- TensorShapeMap _shape_prepare;
- TensorShapeMap _shape_run;
- int _num_runs;
- int _warmup_runs;
- int _run_delay;
- std::unordered_map<uint32_t, uint32_t> _output_sizes;
- bool _gpumem_poll;
- bool _mem_poll;
- bool _write_report;
- bool _print_version = false;
- int _verbose_level;
-};
-
-} // end of namespace nnpkg_run
-
-#endif // __NNPACKAGE_RUN_ARGS_H__
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_FORMATTER_H__
-#define __NNPACKAGE_RUN_FORMATTER_H__
-
-#include <string>
-#include <vector>
-
-#include "types.h"
-#include "allocation.h"
-
-struct nnfw_session;
-
-namespace nnpkg_run
-{
-class Formatter
-{
-public:
- virtual ~Formatter() = default;
- Formatter(nnfw_session *sess) : session_(sess) {}
- virtual void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) = 0;
- virtual void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) = 0;
- virtual std::vector<TensorShape> readTensorShapes(const std::string &filename)
- {
- return std::vector<TensorShape>();
- };
-
-protected:
- nnfw_session *session_;
-};
-} // namespace nnpkg_run
-
-#endif // __NNPACKAGE_RUN_FORMATTER_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "h5formatter.h"
-#include "nnfw.h"
-#include "nnfw_util.h"
-
-#include <iostream>
-#include <stdexcept>
-#include <H5Cpp.h>
-
-namespace
-{
-nnpkg_run::TensorShape getShape(H5::DataSet &data_set)
-{
- std::vector<hsize_t> h5_shape; // hsize_t is unsigned long long
- H5::DataSpace data_space = data_set.getSpace();
- int rank = data_space.getSimpleExtentNdims();
- h5_shape.resize(rank);
-
- // read shape info from H5 file
- data_space.getSimpleExtentDims(h5_shape.data(), NULL);
-
- nnpkg_run::TensorShape shape;
- for (auto dim : h5_shape)
- shape.emplace_back(static_cast<int>(dim));
-
- return shape;
-}
-} // namespace
-
-namespace nnpkg_run
-{
-static const char *h5_value_grpname = "value";
-
-std::vector<TensorShape> H5Formatter::readTensorShapes(const std::string &filename)
-{
- uint32_t num_inputs;
- NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
- std::vector<TensorShape> tensor_shapes;
-
- try
- {
- H5::Exception::dontPrint();
-
- H5::H5File file(filename, H5F_ACC_RDONLY);
- H5::Group value_group = file.openGroup(h5_value_grpname);
-
- // Constraints: if there are n data set names, they should be unique and
- // one of [ "0", "1", .. , "n-1" ]
- for (uint32_t i = 0; i < num_inputs; ++i)
- {
- H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
- H5::DataType type = data_set.getDataType();
- auto shape = getShape(data_set);
-
- tensor_shapes.emplace_back(shape);
- }
-
- return tensor_shapes;
- }
- catch (const H5::Exception &e)
- {
- H5::Exception::printErrorStack();
- std::exit(-1);
- }
- catch (const std::exception &e)
- {
- std::cerr << e.what() << std::endl;
- std::exit(-1);
- }
-}
-
-void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
-{
- uint32_t num_inputs;
- NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
- try
- {
- // Turn off the automatic error printing.
- H5::Exception::dontPrint();
-
- H5::H5File file(filename, H5F_ACC_RDONLY);
- H5::Group value_group = file.openGroup(h5_value_grpname);
- for (uint32_t i = 0; i < num_inputs; ++i)
- {
- nnfw_tensorinfo ti;
- NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
-
- // TODO Add Assert(nnfw shape, h5 file shape size)
-
- // allocate memory for data
- auto bufsz = bufsize_for(&ti);
- inputs[i].alloc(bufsz);
-
- H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
- H5::DataType type = data_set.getDataType();
- switch (ti.dtype)
- {
- case NNFW_TYPE_TENSOR_FLOAT32:
- if (type == H5::PredType::IEEE_F32BE || type == H5::PredType::IEEE_F32LE)
- data_set.read(inputs[i].data(), H5::PredType::NATIVE_FLOAT);
- else
- throw std::runtime_error("model input type is f32. But h5 data type is different.");
- break;
- case NNFW_TYPE_TENSOR_INT32:
- if (type == H5::PredType::STD_I32BE || type == H5::PredType::STD_I32LE)
- data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT32);
- else
- throw std::runtime_error("model input type is i32. But h5 data type is different.");
- break;
- case NNFW_TYPE_TENSOR_INT64:
- if (type == H5::PredType::STD_I64BE || type == H5::PredType::STD_I64LE)
- data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT64);
- else
- throw std::runtime_error("model input type is i64. But h5 data type is different.");
- break;
- case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
- case NNFW_TYPE_TENSOR_BOOL:
- case NNFW_TYPE_TENSOR_UINT8:
- if (type == H5::PredType::STD_U8BE || type == H5::PredType::STD_U8LE)
- data_set.read(inputs[i].data(), H5::PredType::NATIVE_UINT8);
- else
- throw std::runtime_error(
- "model input type is qasymm8, bool or uint8. But h5 data type is different.");
- break;
- case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
- if (type == H5::PredType::STD_I8BE || type == H5::PredType::STD_I8LE)
- data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT8);
- else
- throw std::runtime_error("model input type is int8. But h5 data type is different.");
- break;
- case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
- throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
- default:
- throw std::runtime_error("nnpkg_run can load f32, i32, qasymm8, bool and uint8.");
- }
- NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
- NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
- }
- }
- catch (const H5::Exception &e)
- {
- H5::Exception::printErrorStack();
- std::exit(-1);
- }
- catch (const std::exception &e)
- {
- std::cerr << e.what() << std::endl;
- std::exit(-1);
- }
-};
-
-void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
-{
- uint32_t num_outputs;
- NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
- try
- {
- // Turn off the automatic error printing.
- H5::Exception::dontPrint();
-
- H5::H5File file(filename, H5F_ACC_TRUNC);
- H5::Group value_group = file.createGroup(h5_value_grpname);
- for (uint32_t i = 0; i < num_outputs; i++)
- {
- nnfw_tensorinfo ti;
- NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
- std::vector<hsize_t> dims(ti.rank);
- for (uint32_t j = 0; j < ti.rank; ++j)
- {
- if (ti.dims[j] >= 0)
- dims[j] = static_cast<hsize_t>(ti.dims[j]);
- else
- {
- std::cerr << "Negative dimension in output tensor" << std::endl;
- exit(-1);
- }
- }
- H5::DataSpace data_space(ti.rank, dims.data());
- switch (ti.dtype)
- {
- case NNFW_TYPE_TENSOR_FLOAT32:
- {
- H5::DataSet data_set =
- value_group.createDataSet(std::to_string(i), H5::PredType::IEEE_F32BE, data_space);
- data_set.write(outputs[i].data(), H5::PredType::NATIVE_FLOAT);
- break;
- }
- case NNFW_TYPE_TENSOR_INT32:
- {
- H5::DataSet data_set =
- value_group.createDataSet(std::to_string(i), H5::PredType::STD_I32LE, data_space);
- data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT32);
- break;
- }
- case NNFW_TYPE_TENSOR_INT64:
- {
- H5::DataSet data_set =
- value_group.createDataSet(std::to_string(i), H5::PredType::STD_I64LE, data_space);
- data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT64);
- break;
- }
- case NNFW_TYPE_TENSOR_UINT8:
- case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
- {
- H5::DataSet data_set =
- value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8BE, data_space);
- data_set.write(outputs[i].data(), H5::PredType::NATIVE_UINT8);
- break;
- }
- case NNFW_TYPE_TENSOR_BOOL:
- {
- H5::DataSet data_set =
- value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8LE, data_space);
- data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
- break;
- }
- case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
- {
- H5::DataSet data_set =
- value_group.createDataSet(std::to_string(i), H5::PredType::STD_I8LE, data_space);
- data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
- break;
- }
- case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
- throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
- default:
- throw std::runtime_error("nnpkg_run can dump f32, i32, qasymm8, bool and uint8.");
- }
- }
- }
- catch (const H5::Exception &e)
- {
- H5::Exception::printErrorStack();
- std::exit(-1);
- }
- catch (const std::runtime_error &e)
- {
- std::cerr << "Error during dumpOutputs on nnpackage_run : " << e.what() << std::endl;
- std::exit(-1);
- }
-};
-
-} // end of namespace nnpkg_run
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_H5FORMATTER_H__
-#define __NNPACKAGE_RUN_H5FORMATTER_H__
-
-#include "allocation.h"
-#include "formatter.h"
-#include "types.h"
-
-#include <string>
-#include <vector>
-
-struct nnfw_session;
-
-namespace nnpkg_run
-{
-class H5Formatter : public Formatter
-{
-public:
- H5Formatter(nnfw_session *sess) : Formatter(sess) {}
- std::vector<TensorShape> readTensorShapes(const std::string &filename) override;
- void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
- void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
-};
-} // namespace nnpkg_run
-
-#endif // __NNPACKAGE_RUN_H5FORMATTER_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cassert>
-#include <string>
-#include "nnfw.h"
-
-namespace nnpkg_run
-{
-uint64_t num_elems(const nnfw_tensorinfo *ti)
-{
- uint64_t n = 1;
- for (uint32_t i = 0; i < ti->rank; ++i)
- {
- assert(ti->dims[i] >= 0);
- n *= ti->dims[i];
- }
- return n;
-}
-
-uint64_t bufsize_for(const nnfw_tensorinfo *ti)
-{
- static int elmsize[] = {
- sizeof(float), /* NNFW_TYPE_TENSOR_FLOAT32 */
- sizeof(int), /* NNFW_TYPE_TENSOR_INT32 */
- sizeof(uint8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM */
- sizeof(bool), /* NNFW_TYPE_TENSOR_BOOL = 3 */
- sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
- sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
- sizeof(int8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6 */
- sizeof(int16_t), /* NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7 */
- };
- return elmsize[ti->dtype] * num_elems(ti);
-}
-
-} // namespace nnpkg_run
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_NNFW_UTIL_H__
-#define __NNPACKAGE_RUN_NNFW_UTIL_H__
-
-#include "nnfw.h"
-
-#define NNPR_ENSURE_STATUS(a) \
- do \
- { \
- if ((a) != NNFW_STATUS_NO_ERROR) \
- { \
- exit(-1); \
- } \
- } while (0)
-
-namespace nnpkg_run
-{
-uint64_t num_elems(const nnfw_tensorinfo *ti);
-uint64_t bufsize_for(const nnfw_tensorinfo *ti);
-} // end of namespace nnpkg_run
-
-#endif // __NNPACKAGE_UTIL_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "allocation.h"
-#include "args.h"
-#include "benchmark.h"
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-#include "h5formatter.h"
-#endif
-#include "nnfw.h"
-#include "nnfw_util.h"
-#include "nnfw_internal.h"
-#include "randomgen.h"
-#include "rawformatter.h"
-#ifdef RUY_PROFILER
-#include "ruy/profiler/profiler.h"
-#endif
-
-#include <boost/program_options.hpp>
-#include <cassert>
-#include <chrono>
-#include <cstdlib>
-#include <iostream>
-#include <libgen.h>
-#include <stdexcept>
-#include <unordered_map>
-#include <vector>
-
-static const char *default_backend_cand = "cpu";
-
-void overwriteShapeMap(nnpkg_run::TensorShapeMap &shape_map,
- std::vector<nnpkg_run::TensorShape> shapes)
-{
- for (uint32_t i = 0; i < shapes.size(); i++)
- shape_map[i] = shapes[i];
-}
-
-int main(const int argc, char **argv)
-{
- using namespace nnpkg_run;
-
- try
- {
- Args args(argc, argv);
- auto nnpackage_path = args.getPackageFilename();
- if (args.printVersion())
- {
- uint32_t version;
- NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
- std::cout << "nnpkg_run (nnfw runtime: v" << (version >> 24) << "."
- << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
- exit(0);
- }
-
-#ifdef RUY_PROFILER
- ruy::profiler::ScopeProfile ruy_profile;
-#endif
-
- // TODO Apply verbose level to phases
- const int verbose = args.getVerboseLevel();
- benchmark::Phases phases(
- benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
-
- nnfw_session *session = nullptr;
- NNPR_ENSURE_STATUS(nnfw_create_session(&session));
-
- // ModelLoad
- phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
- NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, nnpackage_path.c_str()));
- });
-
- char *available_backends = std::getenv("BACKENDS");
- if (available_backends)
- NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
-
- uint32_t num_inputs;
- NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
-
- // verify input and output
-
- auto verifyInputTypes = [session]() {
- uint32_t sz;
- NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
- for (uint32_t i = 0; i < sz; ++i)
- {
- nnfw_tensorinfo ti;
- NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
-
- if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
- {
- std::cerr << "E: not supported input type" << std::endl;
- exit(-1);
- }
- }
- };
-
- auto verifyOutputTypes = [session]() {
- uint32_t sz;
- NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
-
- for (uint32_t i = 0; i < sz; ++i)
- {
- nnfw_tensorinfo ti;
- NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
-
- if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
- {
- std::cerr << "E: not supported output type" << std::endl;
- exit(-1);
- }
- }
- };
-
- auto setTensorInfo = [session](const TensorShapeMap &tensor_shape_map) {
- for (auto tensor_shape : tensor_shape_map)
- {
- auto ind = tensor_shape.first;
- auto &shape = tensor_shape.second;
- nnfw_tensorinfo ti;
- // to fill dtype
- NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, ind, &ti));
-
- bool set_input = false;
- if (ti.rank != shape.size())
- {
- set_input = true;
- }
- else
- {
- for (int i = 0; i < ti.rank; i++)
- {
- if (ti.dims[i] != shape.at(i))
- {
- set_input = true;
- break;
- }
- }
- }
- if (!set_input)
- continue;
-
- ti.rank = shape.size();
- for (int i = 0; i < ti.rank; i++)
- ti.dims[i] = shape.at(i);
- NNPR_ENSURE_STATUS(nnfw_set_input_tensorinfo(session, ind, &ti));
- }
- };
-
- verifyInputTypes();
- verifyOutputTypes();
-
-// set input shape before compilation
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-
- auto fill_shape_from_h5 = [&session](const std::string &h5_file, TensorShapeMap &shape_map) {
- assert(!h5_file.empty());
- auto shapes = H5Formatter(session).readTensorShapes(h5_file);
- overwriteShapeMap(shape_map, shapes);
- };
-
- if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
- fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForPrepare());
-#endif
- setTensorInfo(args.getShapeMapForPrepare());
-
- // prepare execution
-
- // TODO When nnfw_{prepare|run} are failed, can't catch the time
- phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
- NNPR_ENSURE_STATUS(nnfw_prepare(session));
- });
-
-// set input shape after compilation and before execution
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
- if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN ||
- (!args.getLoadFilename().empty() && !args.shapeParamProvided()))
- fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForRun());
-#endif
- setTensorInfo(args.getShapeMapForRun());
-
- // prepare input
- std::vector<Allocation> inputs(num_inputs);
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
- if (!args.getLoadFilename().empty())
- H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
- else if (!args.getLoadRawFilename().empty())
- RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
- else
- RandomGenerator(session).generate(inputs);
-#else
- if (!args.getLoadRawFilename().empty())
- RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
- else
- RandomGenerator(session).generate(inputs);
-#endif
-
- // prepare output
- uint32_t num_outputs = 0;
- NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_outputs));
- std::vector<Allocation> outputs(num_outputs);
- auto output_sizes = args.getOutputSizes();
- for (uint32_t i = 0; i < num_outputs; i++)
- {
- nnfw_tensorinfo ti;
- uint64_t output_size_in_bytes = 0;
- {
- auto found = output_sizes.find(i);
- if (found == output_sizes.end())
- {
- NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
- output_size_in_bytes = bufsize_for(&ti);
- }
- else
- {
- output_size_in_bytes = found->second;
- }
- }
- outputs[i].alloc(output_size_in_bytes);
- NNPR_ENSURE_STATUS(
- nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
- NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
- }
-
- // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
- // only warmup.
- if (verbose == 0)
- {
- phases.run(
- "WARMUP",
- [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
- args.getWarmupRuns());
- phases.run(
- "EXECUTE",
- [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
- args.getNumRuns(), true);
- }
- else
- {
- phases.run(
- "WARMUP",
- [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
- [&](const benchmark::Phase &phase, uint32_t nth) {
- std::cout << "... "
- << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
- << std::endl;
- },
- args.getWarmupRuns());
- phases.run(
- "EXECUTE",
- [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
- [&](const benchmark::Phase &phase, uint32_t nth) {
- std::cout << "... "
- << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
- << std::endl;
- },
- args.getNumRuns(), true);
- }
-
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
- // dump output tensors
- if (!args.getDumpFilename().empty())
- H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
-#endif
- if (!args.getDumpRawFilename().empty())
- RawFormatter(session).dumpOutputs(args.getDumpRawFilename(), outputs);
-
- NNPR_ENSURE_STATUS(nnfw_close_session(session));
-
- // TODO Apply verbose level to result
-
- // prepare result
- benchmark::Result result(phases);
-
- // to stdout
- benchmark::printResult(result);
-
- // to csv
- if (args.getWriteReport() == false)
- return 0;
-
- // prepare csv task
- std::string exec_basename;
- std::string nnpkg_basename;
- std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
- {
- char buf[PATH_MAX];
- char *res = realpath(nnpackage_path.c_str(), buf);
- if (res)
- {
- nnpkg_basename = basename(buf);
- }
- else
- {
- std::cerr << "E: during getting realpath from nnpackage_path." << std::endl;
- exit(-1);
- }
- exec_basename = basename(argv[0]);
- }
-
- benchmark::writeResult(result, exec_basename, nnpkg_basename, backend_name);
-
- return 0;
- }
- catch (boost::program_options::error &e)
- {
- std::cerr << "E: " << e.what() << std::endl;
- exit(-1);
- }
- catch (std::runtime_error &e)
- {
- std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
- exit(-1);
- }
-}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "randomgen.h"
-#include "nnfw.h"
-#include "nnfw_util.h"
-#include "misc/RandomGenerator.h"
-
-#include <iostream>
-
-namespace nnpkg_run
-{
-
-template <class T> void randomData(nnfw::misc::RandomGenerator &randgen, void *data, uint64_t size)
-{
- for (uint64_t i = 0; i < size; i++)
- reinterpret_cast<T *>(data)[i] = randgen.generate<T>();
-}
-
-void RandomGenerator::generate(std::vector<Allocation> &inputs)
-{
- // generate random data
- const int seed = 1;
- nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
- for (uint32_t i = 0; i < inputs.size(); ++i)
- {
- nnfw_tensorinfo ti;
- NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
- auto input_size_in_bytes = bufsize_for(&ti);
- inputs[i].alloc(input_size_in_bytes);
- switch (ti.dtype)
- {
- case NNFW_TYPE_TENSOR_FLOAT32:
- randomData<float>(randgen, inputs[i].data(), num_elems(&ti));
- break;
- case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
- randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
- break;
- case NNFW_TYPE_TENSOR_BOOL:
- randomData<bool>(randgen, inputs[i].data(), num_elems(&ti));
- break;
- case NNFW_TYPE_TENSOR_UINT8:
- randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
- break;
- case NNFW_TYPE_TENSOR_INT32:
- randomData<int32_t>(randgen, inputs[i].data(), num_elems(&ti));
- break;
- case NNFW_TYPE_TENSOR_INT64:
- randomData<int64_t>(randgen, inputs[i].data(), num_elems(&ti));
- break;
- case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
- randomData<int16_t>(randgen, inputs[i].data(), num_elems(&ti));
- break;
- default:
- std::cerr << "Not supported input type" << std::endl;
- std::exit(-1);
- }
- NNPR_ENSURE_STATUS(
- nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), input_size_in_bytes));
- NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
- }
-};
-
-} // end of namespace nnpkg_run
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_RANDOMGEN_H__
-#define __NNPACKAGE_RUN_RANDOMGEN_H__
-
-#include <string>
-#include <vector>
-
-#include "allocation.h"
-
-struct nnfw_session;
-
-namespace nnpkg_run
-{
-class RandomGenerator
-{
-public:
- RandomGenerator(nnfw_session *sess) : session_(sess) {}
- void generate(std::vector<Allocation> &inputs);
-
-private:
- nnfw_session *session_;
-};
-} // namespace nnpkg_run
-
-#endif // __NNPACKAGE_RUN_RANDOMGEN_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "rawformatter.h"
-#include "nnfw.h"
-#include "nnfw_util.h"
-
-#include <iostream>
-#include <fstream>
-#include <stdexcept>
-
-namespace nnpkg_run
-{
-void RawFormatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
-{
- uint32_t num_inputs;
- NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
-
- // Support multiple inputs
- // Option 1: Get comman-separated input file list like --load:raw a,b,c
- // Option 2: Get prefix --load:raw in
- // Internally access in.0, in.1, in.2, ... in.{N-1} where N is determined by nnfw info
- // query api.
- //
- // Currently Option 2 is implemented.
- try
- {
- for (uint32_t i = 0; i < num_inputs; ++i)
- {
- nnfw_tensorinfo ti;
- NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
-
- // allocate memory for data
- auto bufsz = bufsize_for(&ti);
- inputs[i].alloc(bufsz);
-
- std::ifstream file(filename + "." + std::to_string(i), std::ios::ate | std::ios::binary);
- auto filesz = file.tellg();
- if (bufsz != filesz)
- {
- throw std::runtime_error("Input " + std::to_string(i) +
- " size does not match: " + std::to_string(bufsz) +
- " expected, but " + std::to_string(filesz) + " provided.");
- }
- file.seekg(0, std::ios::beg);
- file.read(reinterpret_cast<char *>(inputs[i].data()), filesz);
- file.close();
-
- NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
- NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
- }
- }
- catch (const std::exception &e)
- {
- std::cerr << e.what() << std::endl;
- std::exit(-1);
- }
-};
-
-void RawFormatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
-{
- uint32_t num_outputs;
- NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
- try
- {
- for (uint32_t i = 0; i < num_outputs; i++)
- {
- nnfw_tensorinfo ti;
- NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
- auto bufsz = bufsize_for(&ti);
-
- std::ofstream file(filename + "." + std::to_string(i), std::ios::out | std::ios::binary);
- file.write(reinterpret_cast<const char *>(outputs[i].data()), bufsz);
- file.close();
- std::cerr << filename + "." + std::to_string(i) + " is generated.\n";
- }
- }
- catch (const std::runtime_error &e)
- {
- std::cerr << "Error during dumpOutputs on nnpackage_run : " << e.what() << std::endl;
- std::exit(-1);
- }
-}
-} // end of namespace nnpkg_run
+++ /dev/null
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_RAWFORMATTER_H__
-#define __NNPACKAGE_RUN_RAWFORMATTER_H__
-
-#include "allocation.h"
-#include "formatter.h"
-#include "types.h"
-
-#include <string>
-#include <vector>
-
-struct nnfw_session;
-
-namespace nnpkg_run
-{
-class RawFormatter : public Formatter
-{
-public:
- RawFormatter(nnfw_session *sess) : Formatter(sess) {}
- void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
- void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
-};
-} // namespace nnpkg_run
-
-#endif // __NNPACKAGE_RUN_RAWFORMATTER_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNPACKAGE_RUN_TYPES_H__
-#define __NNPACKAGE_RUN_TYPES_H__
-
-namespace nnpkg_run
-{
-
-using TensorShape = std::vector<int>;
-
-} // end of namespace nnpkg_run
-
-#endif // __NNPACKAGE_RUN_TYPES_H__
--- /dev/null
+if(NOT BUILD_ONERT_RUN)
+ return()
+endif(NOT BUILD_ONERT_RUN)
+
+if(NOT BUILD_ONERT)
+ return()
+endif(NOT BUILD_ONERT)
+
+list(APPEND ONERT_RUN_SRCS "src/onert_run.cc")
+list(APPEND ONERT_RUN_SRCS "src/args.cc")
+list(APPEND ONERT_RUN_SRCS "src/nnfw_util.cc")
+list(APPEND ONERT_RUN_SRCS "src/randomgen.cc")
+list(APPEND ONERT_RUN_SRCS "src/rawformatter.cc")
+
+nnfw_find_package(Boost REQUIRED program_options)
+nnfw_find_package(Ruy QUIET)
+nnfw_find_package(HDF5 QUIET)
+
+if (HDF5_FOUND)
+ list(APPEND ONERT_RUN_SRCS "src/h5formatter.cc")
+endif()
+
+add_executable(onert_run ${ONERT_RUN_SRCS})
+
+if (HDF5_FOUND)
+ target_compile_definitions(onert_run PRIVATE ONERT_HAVE_HDF5=1)
+ target_include_directories(onert_run PRIVATE ${HDF5_INCLUDE_DIRS})
+ target_link_libraries(onert_run ${HDF5_CXX_LIBRARIES})
+else()
+ message(WARNING "HDF5 NOT found. Install libhdf5-dev or set EXT_HDF5_DIR to support load/dump in onert_run.")
+endif(HDF5_FOUND)
+
+target_include_directories(onert_run PRIVATE src)
+target_include_directories(onert_run PRIVATE ${Boost_INCLUDE_DIRS})
+
+target_link_libraries(onert_run nnfw_lib_tflite jsoncpp)
+target_link_libraries(onert_run nnfw-dev)
+target_link_libraries(onert_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries(onert_run nnfw_lib_benchmark)
+if(Ruy_FOUND AND PROFILE_RUY)
+ target_link_libraries(onert_run ruy_instrumentation)
+ target_link_libraries(onert_run ruy_profiler)
+endif(Ruy_FOUND AND PROFILE_RUY)
+
+install(TARGETS onert_run DESTINATION bin)
--- /dev/null
+# onert_run
+
+`onert_run` is a tool to run `nnpackage`.
+
+It takes `nnpackage` as input. It uses **runtime API** internally.
+
+## Usage
+
+### Simple run
+
+This will run with random input data
+
+```
+$ ./onert_run path_to_nnpackage_directory
+```
+
+Output would look like:
+
+```
+nnfw_prepare takes 425.235 ms
+nnfw_run takes 2.525 ms
+```
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_ALLOCATION_H__
+#define __ONERT_RUN_ALLOCATION_H__
+
+#include <cstdlib>
+#include <cstdint>
+
+namespace onert_run
+{
+class Allocation
+{
+public:
+ Allocation() : data_(nullptr) {}
+ ~Allocation() { free(data_); }
+ void *data() const { return data_; }
+ void *alloc(uint64_t sz) { return data_ = malloc(sz); }
+
+private:
+ void *data_;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_ALLOCATION_H__
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "args.h"
+
+#include <functional>
+#include <iostream>
+#include <sys/stat.h>
+#include <json/json.h>
+
+namespace
+{
+
+// This function parses a json object and returns as a vector of integers
+// For example,
+// [0, [1, 2, 3, 4], 3, 40, 4, []] in JSON
+// is converted to:
+// {
+// 0 -> [1, 2, 3, 4]
+// 3 -> 40
+// 4 -> []
+// } in std::unordered_map. Note that the value type is still Json::Value.
+std::unordered_map<uint32_t, Json::Value> argArrayToMap(const Json::Value &jsonval)
+{
+ if (!jsonval.isArray() || (jsonval.size() % 2 != 0))
+ {
+ std::cerr << "JSON argument must be an even-sized array in JSON\n";
+ exit(1);
+ }
+
+ std::unordered_map<uint32_t, Json::Value> ret;
+ for (uint32_t i = 0; i < jsonval.size(); i += 2)
+ {
+ if (!jsonval[i].isUInt())
+ {
+ std::cerr << "Key values(values in even indices) must be unsigned integers\n";
+ exit(1);
+ }
+ uint32_t key = jsonval[i].asUInt();
+ Json::Value val = jsonval[i + 1];
+ ret[key] = jsonval[i + 1];
+ }
+ return ret;
+}
+
+// param shape_str is a form of, e.g., "[1, [2, 3], 3, []]" or "h5"
+void handleShapeJsonParam(onert_run::TensorShapeMap &shape_map, const std::string &shape_str)
+{
+ Json::Value root;
+ Json::Reader reader;
+ if (!reader.parse(shape_str, root, false))
+ {
+ std::cerr << "Invalid JSON format for output_sizes \"" << shape_str << "\"\n";
+ exit(1);
+ }
+
+ auto arg_map = argArrayToMap(root);
+ for (auto &pair : arg_map)
+ {
+ uint32_t key = pair.first;
+ Json::Value &shape_json = pair.second;
+ if (!shape_json.isArray())
+ {
+ std::cerr << "All the values must be list: " << shape_str << "\n";
+ exit(1);
+ }
+
+ std::vector<int> shape;
+ for (auto &dim_json : shape_json)
+ {
+ if (!dim_json.isUInt())
+ {
+ std::cerr << "All the dims should be dim >= 0: " << shape_str << "\n";
+ exit(1);
+ }
+
+ shape.emplace_back(dim_json.asUInt64());
+ }
+
+ shape_map[key] = shape;
+ }
+}
+
+void checkModelfile(const std::string &model_filename)
+{
+ if (model_filename.empty())
+ {
+ // TODO Print usage instead of the below message
+ std::cerr << "Please specify model file. Run with `--help` for usage."
+ << "\n";
+
+ exit(1);
+ }
+ else
+ {
+ if (access(model_filename.c_str(), F_OK) == -1)
+ {
+ std::cerr << "Model file not found: " << model_filename << "\n";
+ exit(1);
+ }
+ }
+}
+
+void checkPackage(const std::string &package_filename)
+{
+ if (package_filename.empty())
+ {
+ // TODO Print usage instead of the below message
+ std::cerr << "Please specify nnpackage file. Run with `--help` for usage."
+ << "\n";
+
+ exit(1);
+ }
+ else
+ {
+ if (access(package_filename.c_str(), F_OK) == -1)
+ {
+ std::cerr << "nnpackage not found: " << package_filename << "\n";
+ exit(1);
+ }
+ }
+}
+
+} // namespace
+
+namespace onert_run
+{
+
+Args::Args(const int argc, char **argv)
+{
+ Initialize();
+ Parse(argc, argv);
+}
+
+void Args::Initialize(void)
+{
+ auto process_nnpackage = [&](const std::string &package_filename) {
+ _package_filename = package_filename;
+
+ std::cerr << "Package Filename " << _package_filename << std::endl;
+ checkPackage(package_filename);
+ };
+
+ auto process_modelfile = [&](const std::string &model_filename) {
+ _model_filename = model_filename;
+
+ std::cerr << "Model Filename " << _model_filename << std::endl;
+ checkModelfile(model_filename);
+
+ _use_single_model = true;
+ };
+
+ auto process_path = [&](const std::string &path) {
+ struct stat sb;
+ if (stat(path.c_str(), &sb) == 0)
+ {
+ if (sb.st_mode & S_IFDIR)
+ {
+ _package_filename = path;
+ checkPackage(path);
+ std::cerr << "Package Filename " << path << std::endl;
+ }
+ else
+ {
+ _model_filename = path;
+ checkModelfile(path);
+ std::cerr << "Model Filename " << path << std::endl;
+ _use_single_model = true;
+ }
+ }
+ else
+ {
+ std::cerr << "Cannot find: " << path << "\n";
+ exit(1);
+ }
+ };
+
+ auto process_output_sizes = [&](const std::string &output_sizes_json_str) {
+ Json::Value root;
+ Json::Reader reader;
+ if (!reader.parse(output_sizes_json_str, root, false))
+ {
+ std::cerr << "Invalid JSON format for output_sizes \"" << output_sizes_json_str << "\"\n";
+ exit(1);
+ }
+
+ auto arg_map = argArrayToMap(root);
+ for (auto &pair : arg_map)
+ {
+ uint32_t key = pair.first;
+ Json::Value &val_json = pair.second;
+ if (!val_json.isUInt())
+ {
+ std::cerr << "All the values in `output_sizes` must be unsigned integers\n";
+ exit(1);
+ }
+ uint32_t val = val_json.asUInt();
+ _output_sizes[key] = val;
+ }
+ };
+
+ auto process_shape_prepare = [&](const std::string &shape_str) {
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ if (shape_str == "H5" || shape_str == "h5")
+ {
+ _when_to_use_h5_shape = WhenToUseH5Shape::PREPARE;
+ return;
+ }
+#endif
+ try
+ {
+ handleShapeJsonParam(_shape_prepare, shape_str);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "error with '--shape_prepare' option: " << shape_str << std::endl;
+ exit(1);
+ }
+ };
+
+ auto process_shape_run = [&](const std::string &shape_str) {
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ if (shape_str == "H5" || shape_str == "h5")
+ {
+ _when_to_use_h5_shape = WhenToUseH5Shape::RUN;
+ return;
+ }
+#endif
+ try
+ {
+ handleShapeJsonParam(_shape_run, shape_str);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "error with '--shape_run' option: " << shape_str << std::endl;
+ exit(1);
+ }
+ };
+
+ // General options
+ po::options_description general("General options", 100);
+
+ // clang-format off
+ general.add_options()
+ ("help,h", "Print available options")
+ ("version", "Print version and exit immediately")
+ ("nnpackage", po::value<std::string>()->notifier(process_nnpackage), "NN Package file(directory) name")
+ ("modelfile", po::value<std::string>()->notifier(process_modelfile), "NN Model filename")
+ ("path", po::value<std::string>()->notifier(process_path), "NN Package or NN Modelfile path")
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ ("dump,d", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_filename = v; }), "Output filename")
+ ("load,l", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_filename = v; }), "Input filename")
+#endif
+ ("dump:raw", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_raw_filename = v; }), "Raw Output filename")
+ ("load:raw", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_raw_filename = v; }), "Raw Input filename")
+ ("output_sizes", po::value<std::string>()->notifier(process_output_sizes),
+ "The output buffer size in JSON 1D array\n"
+ "If not given, the model's output sizes are used\n"
+ "e.g. '[0, 40, 2, 80]' to set 0th tensor to 40 and 2nd tensor to 80.\n")
+ ("num_runs,r", po::value<int>()->default_value(1)->notifier([&](const auto &v) { _num_runs = v; }), "The number of runs")
+ ("warmup_runs,w", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _warmup_runs = v; }), "The number of warmup runs")
+ ("run_delay,t", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _run_delay = v; }), "Delay time(us) between runs (as default no delay")
+ ("gpumem_poll,g", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _gpumem_poll = v; }), "Check gpu memory polling separately")
+ ("mem_poll,m", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _mem_poll = v; }), "Check memory polling")
+ ("write_report,p", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _write_report = v; }),
+ "Write report\n"
+ "{exec}-{nnpkg|modelfile}-{backend}.csv will be generated.\n"
+ "e.g. onert_run-UNIT_Add_000-acl_cl.csv.\n"
+ "{nnpkg|modelfile} name may be changed to realpath if you use symbolic-link.")
+ ("shape_prepare", po::value<std::string>()->default_value("[]")->notifier(process_shape_prepare),
+ "Please refer to the description of 'shape_run'")
+ ("shape_run", po::value<std::string>()->default_value("[]")->notifier(process_shape_run),
+ "'--shape_prepare: set shape of tensors before compilation (before calling nnfw_prepare()).\n"
+ "'--shape_run: set shape of tensors before running (before calling nnfw_run()).\n"
+ "Allowed value:.\n"
+ "'[0, [1, 2], 2, []]': set 0th tensor to [1, 2] and 2nd tensor to [] (scalar).\n"
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ "'h5': read shape(s) from H5 input file. '--load' should also be provided.\n"
+ "if '--load' option is provided but '--shape_prepare' or '--shape_run' is not provided,\n"
+ "'--shape_run h5' will be used by default.\n"
+#endif
+ "For detailed description, please consutl the description of nnfw_set_input_tensorinfo()\n"
+ )
+ ("verbose_level,v", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _verbose_level = v; }),
+ "Verbose level\n"
+ "0: prints the only result. Messages btw run don't print\n"
+ "1: prints result and message btw run\n"
+ "2: prints all of messages to print\n")
+ ;
+ // clang-format on
+
+ _options.add(general);
+ _positional.add("path", -1);
+}
+
+void Args::Parse(const int argc, char **argv)
+{
+ po::variables_map vm;
+ po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
+ vm);
+
+ if (vm.count("help"))
+ {
+ std::cout << "onert_run\n\n";
+ std::cout << "Usage: " << argv[0] << " path to nnpackage root directory [<options>]\n\n";
+ std::cout << _options;
+ std::cout << "\n";
+
+ exit(0);
+ }
+
+ if (vm.count("version"))
+ {
+ _print_version = true;
+ return;
+ }
+
+ {
+ auto conflicting_options = [&](const std::string &o1, const std::string &o2) {
+ if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted()))
+ {
+ throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 +
+ "' cannot be given at once.");
+ }
+ };
+
+ // calling, e.g., "onert_run .. -- shape_prepare .. --shape_run .." should theoretically
+ // work but allowing both options together on command line makes the usage and implemenation
+ // of onert_run too complicated. Therefore let's not allow those option together.
+ conflicting_options("shape_prepare", "shape_run");
+
+ // Cannot use both single model file and nnpackage at once
+ conflicting_options("modelfile", "nnpackage");
+
+ // Require modelfile, nnpackage, or path
+ if (!vm.count("modelfile") && !vm.count("nnpackage") && !vm.count("path"))
+ throw boost::program_options::error(
+ std::string("Require one of options modelfile, nnpackage, or path."));
+ }
+
+ try
+ {
+ po::notify(vm);
+ }
+ catch (const std::bad_cast &e)
+ {
+ std::cerr << "Bad cast error - " << e.what() << '\n';
+ exit(1);
+ }
+
+ // This must be run after `notify` as `_warm_up_runs` must have been processed before.
+ if (vm.count("mem_poll"))
+ {
+ // Instead of EXECUTE to avoid overhead, memory polling runs on WARMUP
+ if (_mem_poll && _warmup_runs == 0)
+ {
+ _warmup_runs = 1;
+ }
+ }
+}
+
+bool Args::shapeParamProvided()
+{
+ bool provided = false;
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ // "--shape_run h5" or "--shape_prepare h5" was provided
+ provided = (getWhenToUseH5Shape() != WhenToUseH5Shape::NOT_PROVIDED);
+#endif
+ // specific shape was provided
+ // e.g., "--shape_run '[0, [10, 1]]'" or "--shape_prepare '[0, [10, 1]]'"
+ provided |= (!getShapeMapForPrepare().empty()) || (!getShapeMapForRun().empty());
+
+ return provided;
+}
+
+} // end of namespace onert_run
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_ARGS_H__
+#define __ONERT_RUN_ARGS_H__
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include <boost/program_options.hpp>
+
+#include "types.h"
+
+namespace po = boost::program_options;
+
+namespace onert_run
+{
+
+using TensorShapeMap = std::unordered_map<uint32_t, TensorShape>;
+
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+enum class WhenToUseH5Shape
+{
+ NOT_PROVIDED, // Param not provided
+ PREPARE, // read shapes in h5 file and set them as inputs' shape before calling nnfw_prepare()
+ RUN, // read shapes in h5 file and set them as inputs' shape before calling nnfw_run()
+};
+#endif
+
+class Args
+{
+public:
+ Args(const int argc, char **argv);
+ void print(void);
+
+ const std::string &getPackageFilename(void) const { return _package_filename; }
+ const std::string &getModelFilename(void) const { return _model_filename; }
+ const bool useSingleModel(void) const { return _use_single_model; }
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ const std::string &getDumpFilename(void) const { return _dump_filename; }
+ const std::string &getLoadFilename(void) const { return _load_filename; }
+ WhenToUseH5Shape getWhenToUseH5Shape(void) const { return _when_to_use_h5_shape; }
+#endif
+ const std::string &getDumpRawFilename(void) const { return _dump_raw_filename; }
+ const std::string &getLoadRawFilename(void) const { return _load_raw_filename; }
+ const int getNumRuns(void) const { return _num_runs; }
+ const int getWarmupRuns(void) const { return _warmup_runs; }
+ const int getRunDelay(void) const { return _run_delay; }
+ std::unordered_map<uint32_t, uint32_t> getOutputSizes(void) const { return _output_sizes; }
+ const bool getGpuMemoryPoll(void) const { return _gpumem_poll; }
+ const bool getMemoryPoll(void) const { return _mem_poll; }
+ const bool getWriteReport(void) const { return _write_report; }
+ const bool printVersion(void) const { return _print_version; }
+ TensorShapeMap &getShapeMapForPrepare() { return _shape_prepare; }
+ TensorShapeMap &getShapeMapForRun() { return _shape_run; }
+ /// @brief Return true if "--shape_run" or "--shape_prepare" is provided
+ bool shapeParamProvided();
+ const int getVerboseLevel(void) const { return _verbose_level; }
+
+private:
+ void Initialize();
+ void Parse(const int argc, char **argv);
+
+private:
+ po::positional_options_description _positional;
+ po::options_description _options;
+
+ std::string _package_filename;
+ std::string _model_filename;
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ std::string _dump_filename;
+ std::string _load_filename;
+ WhenToUseH5Shape _when_to_use_h5_shape = WhenToUseH5Shape::NOT_PROVIDED;
+#endif
+ std::string _dump_raw_filename;
+ std::string _load_raw_filename;
+ TensorShapeMap _shape_prepare;
+ TensorShapeMap _shape_run;
+ int _num_runs;
+ int _warmup_runs;
+ int _run_delay;
+ std::unordered_map<uint32_t, uint32_t> _output_sizes;
+ bool _gpumem_poll;
+ bool _mem_poll;
+ bool _write_report;
+ bool _print_version = false;
+ int _verbose_level;
+ bool _use_single_model = false;
+};
+
+} // end of namespace onert_run
+
+#endif // __ONERT_RUN_ARGS_H__
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_FORMATTER_H__
+#define __ONERT_RUN_FORMATTER_H__
+
+#include <string>
+#include <vector>
+
+#include "types.h"
+#include "allocation.h"
+
+struct nnfw_session;
+
+namespace onert_run
+{
+class Formatter
+{
+public:
+ virtual ~Formatter() = default;
+ Formatter(nnfw_session *sess) : session_(sess) {}
+ virtual void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) = 0;
+ virtual void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) = 0;
+ virtual std::vector<TensorShape> readTensorShapes(const std::string &filename)
+ {
+ return std::vector<TensorShape>();
+ };
+
+protected:
+ nnfw_session *session_;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_FORMATTER_H__
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "h5formatter.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+
+#include <iostream>
+#include <stdexcept>
+#include <H5Cpp.h>
+
+namespace
+{
+onert_run::TensorShape getShape(H5::DataSet &data_set)
+{
+ std::vector<hsize_t> h5_shape; // hsize_t is unsigned long long
+ H5::DataSpace data_space = data_set.getSpace();
+ int rank = data_space.getSimpleExtentNdims();
+ h5_shape.resize(rank);
+
+ // read shape info from H5 file
+ data_space.getSimpleExtentDims(h5_shape.data(), NULL);
+
+ onert_run::TensorShape shape;
+ for (auto dim : h5_shape)
+ shape.emplace_back(static_cast<int>(dim));
+
+ return shape;
+}
+} // namespace
+
+namespace onert_run
+{
+static const char *h5_value_grpname = "value";
+
+std::vector<TensorShape> H5Formatter::readTensorShapes(const std::string &filename)
+{
+ uint32_t num_inputs;
+ NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+ std::vector<TensorShape> tensor_shapes;
+
+ try
+ {
+ H5::Exception::dontPrint();
+
+ H5::H5File file(filename, H5F_ACC_RDONLY);
+ H5::Group value_group = file.openGroup(h5_value_grpname);
+
+ // Constraints: if there are n data set names, they should be unique and
+ // one of [ "0", "1", .. , "n-1" ]
+ for (uint32_t i = 0; i < num_inputs; ++i)
+ {
+ H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
+ H5::DataType type = data_set.getDataType();
+ auto shape = getShape(data_set);
+
+ tensor_shapes.emplace_back(shape);
+ }
+
+ return tensor_shapes;
+ }
+ catch (const H5::Exception &e)
+ {
+ H5::Exception::printErrorStack();
+ std::exit(-1);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << e.what() << std::endl;
+ std::exit(-1);
+ }
+}
+
+void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
+{
+ uint32_t num_inputs;
+ NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+ try
+ {
+ // Turn off the automatic error printing.
+ H5::Exception::dontPrint();
+
+ H5::H5File file(filename, H5F_ACC_RDONLY);
+ H5::Group value_group = file.openGroup(h5_value_grpname);
+ for (uint32_t i = 0; i < num_inputs; ++i)
+ {
+ nnfw_tensorinfo ti;
+ NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+
+ // TODO Add Assert(nnfw shape, h5 file shape size)
+
+ // allocate memory for data
+ auto bufsz = bufsize_for(&ti);
+ inputs[i].alloc(bufsz);
+
+ H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
+ H5::DataType type = data_set.getDataType();
+ switch (ti.dtype)
+ {
+ case NNFW_TYPE_TENSOR_FLOAT32:
+ if (type == H5::PredType::IEEE_F32BE || type == H5::PredType::IEEE_F32LE)
+ data_set.read(inputs[i].data(), H5::PredType::NATIVE_FLOAT);
+ else
+ throw std::runtime_error("model input type is f32. But h5 data type is different.");
+ break;
+ case NNFW_TYPE_TENSOR_INT32:
+ if (type == H5::PredType::STD_I32BE || type == H5::PredType::STD_I32LE)
+ data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT32);
+ else
+ throw std::runtime_error("model input type is i32. But h5 data type is different.");
+ break;
+ case NNFW_TYPE_TENSOR_INT64:
+ if (type == H5::PredType::STD_I64BE || type == H5::PredType::STD_I64LE)
+ data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT64);
+ else
+ throw std::runtime_error("model input type is i64. But h5 data type is different.");
+ break;
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+ case NNFW_TYPE_TENSOR_BOOL:
+ case NNFW_TYPE_TENSOR_UINT8:
+ if (type == H5::PredType::STD_U8BE || type == H5::PredType::STD_U8LE)
+ data_set.read(inputs[i].data(), H5::PredType::NATIVE_UINT8);
+ else
+ throw std::runtime_error(
+ "model input type is qasymm8, bool or uint8. But h5 data type is different.");
+ break;
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+ if (type == H5::PredType::STD_I8BE || type == H5::PredType::STD_I8LE)
+ data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT8);
+ else
+ throw std::runtime_error("model input type is int8. But h5 data type is different.");
+ break;
+ case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+ throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
+ default:
+ throw std::runtime_error("onert_run can load f32, i32, qasymm8, bool and uint8.");
+ }
+ NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
+ NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+ }
+ }
+ catch (const H5::Exception &e)
+ {
+ H5::Exception::printErrorStack();
+ std::exit(-1);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << e.what() << std::endl;
+ std::exit(-1);
+ }
+};
+
+void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
+{
+ uint32_t num_outputs;
+ NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
+ try
+ {
+ // Turn off the automatic error printing.
+ H5::Exception::dontPrint();
+
+ H5::H5File file(filename, H5F_ACC_TRUNC);
+ H5::Group value_group = file.createGroup(h5_value_grpname);
+ for (uint32_t i = 0; i < num_outputs; i++)
+ {
+ nnfw_tensorinfo ti;
+ NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
+ std::vector<hsize_t> dims(ti.rank);
+ for (uint32_t j = 0; j < ti.rank; ++j)
+ {
+ if (ti.dims[j] >= 0)
+ dims[j] = static_cast<hsize_t>(ti.dims[j]);
+ else
+ {
+ std::cerr << "Negative dimension in output tensor" << std::endl;
+ exit(-1);
+ }
+ }
+ H5::DataSpace data_space(ti.rank, dims.data());
+ switch (ti.dtype)
+ {
+ case NNFW_TYPE_TENSOR_FLOAT32:
+ {
+ H5::DataSet data_set =
+ value_group.createDataSet(std::to_string(i), H5::PredType::IEEE_F32BE, data_space);
+ data_set.write(outputs[i].data(), H5::PredType::NATIVE_FLOAT);
+ break;
+ }
+ case NNFW_TYPE_TENSOR_INT32:
+ {
+ H5::DataSet data_set =
+ value_group.createDataSet(std::to_string(i), H5::PredType::STD_I32LE, data_space);
+ data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT32);
+ break;
+ }
+ case NNFW_TYPE_TENSOR_INT64:
+ {
+ H5::DataSet data_set =
+ value_group.createDataSet(std::to_string(i), H5::PredType::STD_I64LE, data_space);
+ data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT64);
+ break;
+ }
+ case NNFW_TYPE_TENSOR_UINT8:
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+ {
+ H5::DataSet data_set =
+ value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8BE, data_space);
+ data_set.write(outputs[i].data(), H5::PredType::NATIVE_UINT8);
+ break;
+ }
+ case NNFW_TYPE_TENSOR_BOOL:
+ {
+ H5::DataSet data_set =
+ value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8LE, data_space);
+ data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
+ break;
+ }
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+ {
+ H5::DataSet data_set =
+ value_group.createDataSet(std::to_string(i), H5::PredType::STD_I8LE, data_space);
+ data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
+ break;
+ }
+ case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+ throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
+ default:
+ throw std::runtime_error("onert_run can dump f32, i32, qasymm8, bool and uint8.");
+ }
+ }
+ }
+ catch (const H5::Exception &e)
+ {
+ H5::Exception::printErrorStack();
+ std::exit(-1);
+ }
+ catch (const std::runtime_error &e)
+ {
+ std::cerr << "Error during dumpOutputs on onert_run : " << e.what() << std::endl;
+ std::exit(-1);
+ }
+};
+
+} // end of namespace onert_run
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_H5FORMATTER_H__
+#define __ONERT_RUN_H5FORMATTER_H__
+
+#include "allocation.h"
+#include "formatter.h"
+#include "types.h"
+
+#include <string>
+#include <vector>
+
+struct nnfw_session;
+
+namespace onert_run
+{
+class H5Formatter : public Formatter
+{
+public:
+ H5Formatter(nnfw_session *sess) : Formatter(sess) {}
+ std::vector<TensorShape> readTensorShapes(const std::string &filename) override;
+ void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
+ void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_H5FORMATTER_H__
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cassert>
+#include <string>
+#include "nnfw.h"
+
+namespace onert_run
+{
+uint64_t num_elems(const nnfw_tensorinfo *ti)
+{
+ uint64_t n = 1;
+ for (uint32_t i = 0; i < ti->rank; ++i)
+ {
+ assert(ti->dims[i] >= 0);
+ n *= ti->dims[i];
+ }
+ return n;
+}
+
+uint64_t bufsize_for(const nnfw_tensorinfo *ti)
+{
+ static int elmsize[] = {
+ sizeof(float), /* NNFW_TYPE_TENSOR_FLOAT32 */
+ sizeof(int), /* NNFW_TYPE_TENSOR_INT32 */
+ sizeof(uint8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM */
+ sizeof(bool), /* NNFW_TYPE_TENSOR_BOOL = 3 */
+ sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
+ sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
+ sizeof(int8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6 */
+ sizeof(int16_t), /* NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7 */
+ };
+ return elmsize[ti->dtype] * num_elems(ti);
+}
+
+} // namespace onert_run
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_NNFW_UTIL_H__
+#define __ONERT_RUN_NNFW_UTIL_H__
+
+#include "nnfw.h"
+
+#define NNPR_ENSURE_STATUS(a) \
+ do \
+ { \
+ if ((a) != NNFW_STATUS_NO_ERROR) \
+ { \
+ exit(-1); \
+ } \
+ } while (0)
+
+namespace onert_run
+{
+uint64_t num_elems(const nnfw_tensorinfo *ti);
+uint64_t bufsize_for(const nnfw_tensorinfo *ti);
+} // end of namespace onert_run
+
+#endif // __ONERT_RUN_NNFW_UTIL_H__
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "allocation.h"
+#include "args.h"
+#include "benchmark.h"
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+#include "h5formatter.h"
+#endif
+#include "nnfw.h"
+#include "nnfw_util.h"
+#include "nnfw_internal.h"
+#include "randomgen.h"
+#include "rawformatter.h"
+#ifdef RUY_PROFILER
+#include "ruy/profiler/profiler.h"
+#endif
+
+#include <boost/program_options.hpp>
+#include <cassert>
+#include <chrono>
+#include <cstdlib>
+#include <iostream>
+#include <libgen.h>
+#include <stdexcept>
+#include <unordered_map>
+#include <vector>
+
+static const char *default_backend_cand = "cpu";
+
+void overwriteShapeMap(onert_run::TensorShapeMap &shape_map,
+ std::vector<onert_run::TensorShape> shapes)
+{
+ for (uint32_t i = 0; i < shapes.size(); i++)
+ shape_map[i] = shapes[i];
+}
+
+int main(const int argc, char **argv)
+{
+ using namespace onert_run;
+
+ try
+ {
+ Args args(argc, argv);
+ if (args.printVersion())
+ {
+ uint32_t version;
+ NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
+ std::cout << "onert_run (nnfw runtime: v" << (version >> 24) << "."
+ << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
+ exit(0);
+ }
+
+#ifdef RUY_PROFILER
+ ruy::profiler::ScopeProfile ruy_profile;
+#endif
+
+ // TODO Apply verbose level to phases
+ const int verbose = args.getVerboseLevel();
+ benchmark::Phases phases(
+ benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
+
+ nnfw_session *session = nullptr;
+ NNPR_ENSURE_STATUS(nnfw_create_session(&session));
+
+ // ModelLoad
+ phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
+ if (args.useSingleModel())
+ NNPR_ENSURE_STATUS(
+ nnfw_load_model_from_modelfile(session, args.getModelFilename().c_str()));
+ else
+ NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, args.getPackageFilename().c_str()));
+ });
+
+ char *available_backends = std::getenv("BACKENDS");
+ if (available_backends)
+ NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
+
+ uint32_t num_inputs;
+ NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
+
+ // verify input and output
+
+ auto verifyInputTypes = [session]() {
+ uint32_t sz;
+ NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
+ for (uint32_t i = 0; i < sz; ++i)
+ {
+ nnfw_tensorinfo ti;
+ NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
+
+ if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
+ {
+ std::cerr << "E: not supported input type" << std::endl;
+ exit(-1);
+ }
+ }
+ };
+
+ auto verifyOutputTypes = [session]() {
+ uint32_t sz;
+ NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
+
+ for (uint32_t i = 0; i < sz; ++i)
+ {
+ nnfw_tensorinfo ti;
+ NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
+
+ if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
+ {
+ std::cerr << "E: not supported output type" << std::endl;
+ exit(-1);
+ }
+ }
+ };
+
+ auto setTensorInfo = [session](const TensorShapeMap &tensor_shape_map) {
+ for (auto tensor_shape : tensor_shape_map)
+ {
+ auto ind = tensor_shape.first;
+ auto &shape = tensor_shape.second;
+ nnfw_tensorinfo ti;
+ // to fill dtype
+ NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, ind, &ti));
+
+ bool set_input = false;
+ if (ti.rank != shape.size())
+ {
+ set_input = true;
+ }
+ else
+ {
+ for (int i = 0; i < ti.rank; i++)
+ {
+ if (ti.dims[i] != shape.at(i))
+ {
+ set_input = true;
+ break;
+ }
+ }
+ }
+ if (!set_input)
+ continue;
+
+ ti.rank = shape.size();
+ for (int i = 0; i < ti.rank; i++)
+ ti.dims[i] = shape.at(i);
+ NNPR_ENSURE_STATUS(nnfw_set_input_tensorinfo(session, ind, &ti));
+ }
+ };
+
+ verifyInputTypes();
+ verifyOutputTypes();
+
+// set input shape before compilation
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+
+ auto fill_shape_from_h5 = [&session](const std::string &h5_file, TensorShapeMap &shape_map) {
+ assert(!h5_file.empty());
+ auto shapes = H5Formatter(session).readTensorShapes(h5_file);
+ overwriteShapeMap(shape_map, shapes);
+ };
+
+ if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
+ fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForPrepare());
+#endif
+ setTensorInfo(args.getShapeMapForPrepare());
+
+ // prepare execution
+
+ // TODO When nnfw_{prepare|run} are failed, can't catch the time
+ phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
+ NNPR_ENSURE_STATUS(nnfw_prepare(session));
+ });
+
+// set input shape after compilation and before execution
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN ||
+ (!args.getLoadFilename().empty() && !args.shapeParamProvided()))
+ fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForRun());
+#endif
+ setTensorInfo(args.getShapeMapForRun());
+
+ // prepare input
+ std::vector<Allocation> inputs(num_inputs);
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ if (!args.getLoadFilename().empty())
+ H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
+ else if (!args.getLoadRawFilename().empty())
+ RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
+ else
+ RandomGenerator(session).generate(inputs);
+#else
+ if (!args.getLoadRawFilename().empty())
+ RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
+ else
+ RandomGenerator(session).generate(inputs);
+#endif
+
+ // prepare output
+ uint32_t num_outputs = 0;
+ NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_outputs));
+ std::vector<Allocation> outputs(num_outputs);
+ auto output_sizes = args.getOutputSizes();
+ for (uint32_t i = 0; i < num_outputs; i++)
+ {
+ nnfw_tensorinfo ti;
+ uint64_t output_size_in_bytes = 0;
+ {
+ auto found = output_sizes.find(i);
+ if (found == output_sizes.end())
+ {
+ NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
+ output_size_in_bytes = bufsize_for(&ti);
+ }
+ else
+ {
+ output_size_in_bytes = found->second;
+ }
+ }
+ outputs[i].alloc(output_size_in_bytes);
+ NNPR_ENSURE_STATUS(
+ nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
+ NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
+ }
+
+ // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
+ // only warmup.
+ if (verbose == 0)
+ {
+ phases.run(
+ "WARMUP",
+ [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+ args.getWarmupRuns());
+ phases.run(
+ "EXECUTE",
+ [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+ args.getNumRuns(), true);
+ }
+ else
+ {
+ phases.run(
+ "WARMUP",
+ [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+ [&](const benchmark::Phase &phase, uint32_t nth) {
+ std::cout << "... "
+ << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
+ << std::endl;
+ },
+ args.getWarmupRuns());
+ phases.run(
+ "EXECUTE",
+ [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+ [&](const benchmark::Phase &phase, uint32_t nth) {
+ std::cout << "... "
+ << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
+ << std::endl;
+ },
+ args.getNumRuns(), true);
+ }
+
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+ // dump output tensors
+ if (!args.getDumpFilename().empty())
+ H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
+#endif
+ if (!args.getDumpRawFilename().empty())
+ RawFormatter(session).dumpOutputs(args.getDumpRawFilename(), outputs);
+
+ NNPR_ENSURE_STATUS(nnfw_close_session(session));
+
+ // TODO Apply verbose level to result
+
+ // prepare result
+ benchmark::Result result(phases);
+
+ // to stdout
+ benchmark::printResult(result);
+
+ // to csv
+ if (args.getWriteReport() == false)
+ return 0;
+
+ // prepare csv task
+ std::string exec_basename;
+ std::string nnpkg_basename;
+ std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
+ {
+ char buf[PATH_MAX];
+ char *res = args.useSingleModel() ? realpath(args.getModelFilename().c_str(), buf)
+ : realpath(args.getPackageFilename().c_str(), buf);
+ if (res)
+ {
+ nnpkg_basename = basename(buf);
+ }
+ else
+ {
+ std::cerr << "E: during getting realpath from nnpackage or model path." << std::endl;
+ exit(-1);
+ }
+ exec_basename = basename(argv[0]);
+ }
+
+ benchmark::writeResult(result, exec_basename, nnpkg_basename, backend_name);
+
+ return 0;
+ }
+ catch (boost::program_options::error &e)
+ {
+ std::cerr << "E: " << e.what() << std::endl;
+ exit(-1);
+ }
+ catch (std::runtime_error &e)
+ {
+ std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
+ exit(-1);
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "randomgen.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+#include "misc/RandomGenerator.h"
+
+#include <iostream>
+
+namespace onert_run
+{
+
+template <class T> void randomData(nnfw::misc::RandomGenerator &randgen, void *data, uint64_t size)
+{
+ for (uint64_t i = 0; i < size; i++)
+ reinterpret_cast<T *>(data)[i] = randgen.generate<T>();
+}
+
+void RandomGenerator::generate(std::vector<Allocation> &inputs)
+{
+ // generate random data
+ const int seed = 1;
+ nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
+ for (uint32_t i = 0; i < inputs.size(); ++i)
+ {
+ nnfw_tensorinfo ti;
+ NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+ auto input_size_in_bytes = bufsize_for(&ti);
+ inputs[i].alloc(input_size_in_bytes);
+ switch (ti.dtype)
+ {
+ case NNFW_TYPE_TENSOR_FLOAT32:
+ randomData<float>(randgen, inputs[i].data(), num_elems(&ti));
+ break;
+ case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+ randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
+ break;
+ case NNFW_TYPE_TENSOR_BOOL:
+ randomData<bool>(randgen, inputs[i].data(), num_elems(&ti));
+ break;
+ case NNFW_TYPE_TENSOR_UINT8:
+ randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
+ break;
+ case NNFW_TYPE_TENSOR_INT32:
+ randomData<int32_t>(randgen, inputs[i].data(), num_elems(&ti));
+ break;
+ case NNFW_TYPE_TENSOR_INT64:
+ randomData<int64_t>(randgen, inputs[i].data(), num_elems(&ti));
+ break;
+ case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+ randomData<int16_t>(randgen, inputs[i].data(), num_elems(&ti));
+ break;
+ default:
+ std::cerr << "Not supported input type" << std::endl;
+ std::exit(-1);
+ }
+ NNPR_ENSURE_STATUS(
+ nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), input_size_in_bytes));
+ NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+ }
+};
+
+} // end of namespace onert_run
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_RANDOMGEN_H__
+#define __ONERT_RUN_RANDOMGEN_H__
+
+#include <string>
+#include <vector>
+
+#include "allocation.h"
+
+struct nnfw_session;
+
+namespace onert_run
+{
+class RandomGenerator
+{
+public:
+ RandomGenerator(nnfw_session *sess) : session_(sess) {}
+ void generate(std::vector<Allocation> &inputs);
+
+private:
+ nnfw_session *session_;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_RANDOMGEN_H__
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rawformatter.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+
+#include <iostream>
+#include <fstream>
+#include <stdexcept>
+
+namespace onert_run
+{
+void RawFormatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
+{
+ uint32_t num_inputs;
+ NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+
+ // Support multiple inputs
+ // Option 1: Get comman-separated input file list like --load:raw a,b,c
+ // Option 2: Get prefix --load:raw in
+ // Internally access in.0, in.1, in.2, ... in.{N-1} where N is determined by nnfw info
+ // query api.
+ //
+ // Currently Option 2 is implemented.
+ try
+ {
+ for (uint32_t i = 0; i < num_inputs; ++i)
+ {
+ nnfw_tensorinfo ti;
+ NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+
+ // allocate memory for data
+ auto bufsz = bufsize_for(&ti);
+ inputs[i].alloc(bufsz);
+
+ std::ifstream file(filename + "." + std::to_string(i), std::ios::ate | std::ios::binary);
+ auto filesz = file.tellg();
+ if (bufsz != filesz)
+ {
+ throw std::runtime_error("Input " + std::to_string(i) +
+ " size does not match: " + std::to_string(bufsz) +
+ " expected, but " + std::to_string(filesz) + " provided.");
+ }
+ file.seekg(0, std::ios::beg);
+ file.read(reinterpret_cast<char *>(inputs[i].data()), filesz);
+ file.close();
+
+ NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
+ NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+ }
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << e.what() << std::endl;
+ std::exit(-1);
+ }
+};
+
+void RawFormatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
+{
+ uint32_t num_outputs;
+ NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
+ try
+ {
+ for (uint32_t i = 0; i < num_outputs; i++)
+ {
+ nnfw_tensorinfo ti;
+ NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
+ auto bufsz = bufsize_for(&ti);
+
+ std::ofstream file(filename + "." + std::to_string(i), std::ios::out | std::ios::binary);
+ file.write(reinterpret_cast<const char *>(outputs[i].data()), bufsz);
+ file.close();
+ std::cerr << filename + "." + std::to_string(i) + " is generated.\n";
+ }
+ }
+ catch (const std::runtime_error &e)
+ {
+ std::cerr << "Error during dumpOutputs on onert_run : " << e.what() << std::endl;
+ std::exit(-1);
+ }
+}
+} // end of namespace onert_run
--- /dev/null
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_RAWFORMATTER_H__
+#define __ONERT_RUN_RAWFORMATTER_H__
+
+#include "allocation.h"
+#include "formatter.h"
+#include "types.h"
+
+#include <string>
+#include <vector>
+
+struct nnfw_session;
+
+namespace onert_run
+{
+class RawFormatter : public Formatter
+{
+public:
+ RawFormatter(nnfw_session *sess) : Formatter(sess) {}
+ void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
+ void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_RAWFORMATTER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_TYPES_H__
+#define __ONERT_RUN_TYPES_H__
+
+namespace onert_run
+{
+
+using TensorShape = std::vector<int>;
+
+} // end of namespace onert_run
+
+#endif // __ONERT_RUN_TYPES_H__
+++ /dev/null
-if (NOT BUILD_TFLITE_BENCHMARK_MODEL)
- return()
-endif(NOT BUILD_TFLITE_BENCHMARK_MODEL)
-
-nnfw_find_package(TensorFlowLite EXACT 1.13.1 REQUIRED)
-
-# TODO Remove this target_compile_definitions command, and just check its presence.
-# This change is prerequisites on pre-built tensorflow-lite package support
-target_compile_definitions(tensorflow-lite PUBLIC "TFLITE_PROFILING_ENABLED")
-
-file(GLOB_RECURSE SOURCES "*.cc")
-
-nnas_find_package(TensorFlowSource EXACT 1.13.1 REQUIRED)
-set(TENSORFLOW_LITE_BASE "${TensorFlowSource_DIR}/tensorflow/lite")
-list(APPEND SOURCES "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_main.cc"
- "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_model.cc"
- "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_params.cc"
- "${TENSORFLOW_LITE_BASE}/tools/benchmark/command_line_flags.cc")
-
-add_executable(tflite_benchmark_model ${SOURCES})
-target_compile_definitions(tflite_benchmark_model PUBLIC "TFLITE_PROFILING_ENABLED")
-target_link_libraries(tflite_benchmark_model nnfw_lib_misc nnfw_lib_tflite nnfw_lib_profiling)
-target_link_libraries(tflite_benchmark_model tensorflow-lite ${LIB_PTHREAD} dl)
-install(TARGETS tflite_benchmark_model DESTINATION bin)
+++ /dev/null
-# TFLite Model Benchmark Tool
-
-## Description
-
-A simple C++ binary to benchmark a TFLite model and its individual operators,
-both on desktop machines and on Android. The binary takes a TFLite model,
-generates random inputs and then repeatedly runs the model for specified number
-of runs. Aggregrate latency statistics are reported after running the benchmark.
-
-The instructions below are for running the binary on Desktop and Android,
-for iOS please use the
-[iOS benchmark app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/benchmark/ios).
-
-## Parameters
-
-The binary takes the following required parameters:
-
-* `graph`: `string` \
- The path to the TFLite model file.
-
-and the following optional parameters:
-
-* `num_threads`: `int` (default=1) \
- The number of threads to use for running TFLite interpreter.
-* `warmup_runs`: `int` (default=1) \
- The number of warmup runs to do before starting the benchmark.
-* `num_runs`: `int` (default=50) \
- The number of runs. Increase this to reduce variance.
-* `run_delay`: `float` (default=-1.0) \
- The delay in seconds between subsequent benchmark runs. Non-positive values
- mean use no delay.
-* `use_nnapi`: `bool` (default=false) \
- Whether to use [Android NNAPI](https://developer.android.com/ndk/guides/neuralnetworks/).
- This API is available on recent Android devices.
-
-## To build/install/run
-
-### On Android:
-
-(0) Refer to https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android to edit the `WORKSPACE` to configure the android NDK/SDK.
-
-(1) Build for your specific platform, e.g.:
-
-```
-bazel build -c opt \
- --config=android_arm \
- --cxxopt='--std=c++11' \
- tensorflow/lite/tools/benchmark:benchmark_model
-```
-
-(2) Connect your phone. Push the binary to your phone with adb push
- (make the directory if required):
-
-```
-adb push bazel-bin/tensorflow/lite/tools/benchmark/benchmark_model /data/local/tmp
-```
-
-(3) Make the binary executable.
-
-```
-adb shell chmod +x /data/local/tmp/benchmark_model
-```
-
-(4) Push the compute graph that you need to test. For example:
-
-```
-adb push mobilenet_quant_v1_224.tflite /data/local/tmp
-```
-
-(5) Run the benchmark. For example:
-
-```
-adb shell /data/local/tmp/benchmark_model \
- --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \
- --num_threads=4
-```
-
-### On desktop:
-(1) build the binary
-
-```
-bazel build -c opt tensorflow/lite/tools/benchmark:benchmark_model
-```
-
-(2) Run on your compute graph, similar to the Android case but without the need of adb shell.
-For example:
-
-```
-bazel-bin/tensorflow/lite/tools/benchmark/benchmark_model \
- --graph=mobilenet_quant_v1_224.tflite \
- --num_threads=4
-```
-
-The MobileNet graph used as an example here may be downloaded from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip).
-
-
-## Reducing variance between runs on Android.
-
-Most modern Android phones use [ARM big.LITTLE](https://en.wikipedia.org/wiki/ARM_big.LITTLE)
-architecture where some cores are more power hungry but faster than other cores.
-When running benchmarks on these phones there can be significant variance
-between different runs of the benchmark. One way to reduce variance between runs
-is to set the [CPU affinity](https://en.wikipedia.org/wiki/Processor_affinity)
-before running the benchmark. On Android this can be done using the `taskset`
-command.
-E.g. for running the benchmark on big cores on Pixel 2 with a single thread one
-can use the following command:
-
-```
-adb shell taskset f0 /data/local/tmp/benchmark_model \
- --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \
- --num_threads=1
-```
-
-where `f0` is the affinity mask for big cores on Pixel 2.
-Note: The affinity mask varies with the device.
-
-## Profiling model operators
-The benchmark model binary also allows you to profile operators and give execution times of each operator. To do this,
-compile the binary with a compiler flag that enables profiling to be compiled in. Pass **--copt=-DTFLITE_PROFILING_ENABLED**
-to compile benchmark with profiling support.
-For example, to compile with profiling support on Android, add this flag to the previous command:
-
-```
-bazel build -c opt \
- --config=android_arm \
- --cxxopt='--std=c++11' \
- --copt=-DTFLITE_PROFILING_ENABLED \
- tensorflow/lite/tools/benchmark:benchmark_model
-```
-This compiles TFLite with profiling enabled, now you can run the benchmark binary like before. The binary will produce detailed statistics for each operation similar to those shown below:
-
-```
-
-============================== Run Order ==============================
- [node type] [start] [first] [avg ms] [%] [cdf%] [mem KB] [times called] [Name]
- CONV_2D 0.000 4.269 4.269 0.107% 0.107% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_0/Relu6]
- DEPTHWISE_CONV_2D 4.270 2.150 2.150 0.054% 0.161% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_depthwise/Relu6]
- CONV_2D 6.421 6.107 6.107 0.153% 0.314% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6]
- DEPTHWISE_CONV_2D 12.528 1.366 1.366 0.034% 0.348% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_2_depthwise/Relu6]
- CONV_2D 13.895 4.195 4.195 0.105% 0.454% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_2_pointwise/Relu6]
- DEPTHWISE_CONV_2D 18.091 1.260 1.260 0.032% 0.485% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_depthwise/Relu6]
- CONV_2D 19.352 6.652 6.652 0.167% 0.652% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6]
- DEPTHWISE_CONV_2D 26.005 0.698 0.698 0.018% 0.670% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_4_depthwise/Relu6]
- CONV_2D 26.703 3.344 3.344 0.084% 0.754% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_4_pointwise/Relu6]
- DEPTHWISE_CONV_2D 30.047 0.646 0.646 0.016% 0.770% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_depthwise/Relu6]
- CONV_2D 30.694 5.800 5.800 0.145% 0.915% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6]
- DEPTHWISE_CONV_2D 36.495 0.331 0.331 0.008% 0.924% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_depthwise/Relu6]
- CONV_2D 36.826 2.838 2.838 0.071% 0.995% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Relu6]
- DEPTHWISE_CONV_2D 39.665 0.439 0.439 0.011% 1.006% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_depthwise/Relu6]
- CONV_2D 40.105 5.293 5.293 0.133% 1.139% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6]
- DEPTHWISE_CONV_2D 45.399 0.352 0.352 0.009% 1.147% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_depthwise/Relu6]
- CONV_2D 45.752 5.322 5.322 0.133% 1.281% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6]
- DEPTHWISE_CONV_2D 51.075 0.357 0.357 0.009% 1.290% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_depthwise/Relu6]
- CONV_2D 51.432 5.693 5.693 0.143% 1.433% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6]
- DEPTHWISE_CONV_2D 57.126 0.366 0.366 0.009% 1.442% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_depthwise/Relu6]
- CONV_2D 57.493 5.472 5.472 0.137% 1.579% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6]
- DEPTHWISE_CONV_2D 62.966 0.364 0.364 0.009% 1.588% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_11_depthwise/Relu6]
- CONV_2D 63.330 5.404 5.404 0.136% 1.724% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6]
- DEPTHWISE_CONV_2D 68.735 0.155 0.155 0.004% 1.728% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_12_depthwise/Relu6]
- CONV_2D 68.891 2.970 2.970 0.074% 1.802% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_12_pointwise/Relu6]
- DEPTHWISE_CONV_2D 71.862 0.206 0.206 0.005% 1.807% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_depthwise/Relu6]
- CONV_2D 72.069 5.888 5.888 0.148% 1.955% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6]
- AVERAGE_POOL_2D 77.958 0.036 0.036 0.001% 1.956% 0.000 0 [MobilenetV1/Logits/AvgPool_1a/AvgPool]
- CONV_2D 77.994 1.445 1.445 0.036% 1.992% 0.000 0 [MobilenetV1/Logits/Conv2d_1c_1x1/BiasAdd]
- RESHAPE 79.440 0.002 0.002 0.000% 1.992% 0.000 0 [MobilenetV1/Predictions/Reshape]
- SOFTMAX 79.443 0.029 0.029 0.001% 1.993% 0.000 0 [MobilenetV1/Predictions/Softmax]
-
-============================== Top by Computation Time ==============================
- [node type] [start] [first] [avg ms] [%] [cdf%] [mem KB] [times called] [Name]
- CONV_2D 19.352 6.652 6.652 0.167% 0.167% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6]
- CONV_2D 6.421 6.107 6.107 0.153% 0.320% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6]
- CONV_2D 72.069 5.888 5.888 0.148% 0.468% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6]
- CONV_2D 30.694 5.800 5.800 0.145% 0.613% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6]
- CONV_2D 51.432 5.693 5.693 0.143% 0.756% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6]
- CONV_2D 57.493 5.472 5.472 0.137% 0.893% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6]
- CONV_2D 63.330 5.404 5.404 0.136% 1.029% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6]
- CONV_2D 45.752 5.322 5.322 0.133% 1.162% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6]
- CONV_2D 40.105 5.293 5.293 0.133% 1.295% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6]
- CONV_2D 0.000 4.269 4.269 0.107% 1.402% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_0/Relu6]
-
-Number of nodes executed: 31
-============================== Summary by node type ==============================
- [Node type] [count] [avg ms] [avg %] [cdf %] [mem KB] [times called]
- CONV_2D 15 1.406 89.270% 89.270% 0.000 0
- DEPTHWISE_CONV_2D 13 0.169 10.730% 100.000% 0.000 0
- SOFTMAX 1 0.000 0.000% 100.000% 0.000 0
- RESHAPE 1 0.000 0.000% 100.000% 0.000 0
- AVERAGE_POOL_2D 1 0.000 0.000% 100.000% 0.000 0
-
-Timings (microseconds): count=50 first=79449 curr=81350 min=77385 max=88213 avg=79732 std=1929
-Memory (bytes): count=0
-31 nodes observed
-
-
-Average inference timings in us: Warmup: 83235, Init: 38467, no stats: 79760.9
-```
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h"
-
-#include <cstdarg>
-#include <cstdlib>
-#include <iostream>
-#include <memory>
-#include <string>
-#include <unordered_set>
-#include <vector>
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/op_resolver.h"
-#include "tensorflow/lite/string_util.h"
-#include "tensorflow/lite/tools/benchmark/logging.h"
-
-#ifdef GEMMLOWP_PROFILING
-#include "gemmlowp/profiling/profiler.h"
-#endif
-
-// For profiling nnapi_delegate
-#include "profiling/profiling.h"
-#include "tflite/ext/nnapi_delegate.h"
-
-namespace {
- nnfw::tflite::NNAPIDelegate nnfw_delegate_;
-}
-
-#ifdef TFLITE_CUSTOM_OPS_HEADER
-void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
-#endif
-
-namespace tflite {
-namespace benchmark {
-
-void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) {
- TFLITE_BENCHMARK_CHECK(interpreter);
- interpreter_ = interpreter;
- interpreter_->SetProfiler(&profiler_);
-}
-
-void ProfilingListener::OnSingleRunStart(RunType run_type) {
- if (run_type == REGULAR) {
- profiler_.Reset();
- profiler_.StartProfiling();
- }
-}
-
-void ProfilingListener::OnBenchmarkEnd(const BenchmarkResults& results) {
- if (has_profiles_) {
- TFLITE_LOG(INFO) << summarizer_.GetOutputString();
- }
-}
-
-void ProfilingListener::OnSingleRunEnd() {
- profiler_.StopProfiling();
- auto profile_events = profiler_.GetProfileEvents();
- has_profiles_ = !profile_events.empty();
- summarizer_.ProcessProfiles(profile_events, *interpreter_);
-}
-
-void GemmlowpProfilingListener::OnBenchmarkStart(
- const BenchmarkParams& params) {
-#ifdef GEMMLOWP_PROFILING
- gemmlowp::RegisterCurrentThreadForProfiling();
- gemmlowp::StartProfiling();
-#endif
-}
-
-void GemmlowpProfilingListener::OnBenchmarkEnd(
- const BenchmarkResults& results) {
-#ifdef GEMMLOWP_PROFILING
- gemmlowp::FinishProfiling();
-#endif
-}
-
-namespace {
-
-std::vector<std::string> Split(const std::string& str, const char delim) {
- std::istringstream input(str);
- std::vector<std::string> results;
- std::string item;
- while (std::getline(input, item, delim)) {
- results.push_back(item);
- }
- return results;
-}
-
-template <typename T>
-bool SplitAndParse(const std::string& str, char delim, std::vector<T>* values) {
- std::istringstream input(str);
- bool first = true;
- while (!input.eof()) {
- if (!first) {
- char c;
- input >> c;
- if (c != delim) {
- return false;
- }
- } else {
- first = false;
- }
- T val;
- input >> val;
- if (!input.eof() && !input.good()) {
- return false;
- }
- values->push_back(val);
- }
- return true;
-}
-
-template <typename T>
-void FillRandomValue(T* ptr, const std::vector<int>& sizes,
- const std::function<T()>& random_func) {
- int num_elements = 1;
- for (int dim : sizes) {
- num_elements *= dim;
- }
- for (int i = 0; i < num_elements; ++i) {
- *ptr++ = random_func();
- }
-}
-
-void FillRandomString(tflite::DynamicBuffer* buffer,
- const std::vector<int>& sizes,
- const std::function<string()>& random_func) {
- int num_elements = 1;
- for (int dim : sizes) {
- num_elements *= dim;
- }
- for (int i = 0; i < num_elements; ++i) {
- auto str = random_func();
- buffer->AddString(str.data(), str.length());
- }
-}
-
-bool PopulateInputLayerInfo(
- const string& names_string, const string& shapes_string,
- std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
- std::vector<std::string> names = Split(names_string, ',');
- std::vector<std::string> shapes = Split(shapes_string, ':');
-
- if (names.size() != shapes.size()) {
- TFLITE_LOG(ERROR) << "The number of items in"
- << " --input_layer_shape (" << shapes_string << ", with "
- << shapes.size() << " items)"
- << " must match the number of items in"
- << " --input_layer (" << names_string << ", with "
- << names.size() << " items)."
- << " For example --input_layer=input1,input2"
- << " --input_layer_shape=1,224,224,4:1,20";
- return false;
- }
-
- for (int i = 0; i < names.size(); ++i) {
- info->push_back(BenchmarkTfLiteModel::InputLayerInfo());
- BenchmarkTfLiteModel::InputLayerInfo& input = info->back();
-
- input.name = names[i];
-
- TFLITE_BENCHMARK_CHECK(SplitAndParse(shapes[i], ',', &input.shape))
- << "Incorrect size string specified: " << shapes[i];
- for (int dim : input.shape) {
- if (dim == -1) {
- TFLITE_LOG(ERROR)
- << "Any unknown sizes in the shapes (-1's) must be replaced"
- << " with the size you want to benchmark with.";
- return false;
- }
- }
- }
-
- return true;
-}
-
-std::vector<int> TfLiteIntArrayToVector(const TfLiteIntArray* int_array) {
- std::vector<int> values;
- values.reserve(int_array->size);
- for (size_t i = 0; i < int_array->size; i++) {
- values.push_back(int_array->data[i]);
- }
- return values;
-}
-
-} // namespace
-
-BenchmarkParams BenchmarkTfLiteModel::DefaultParams() {
- BenchmarkParams default_params = BenchmarkModel::DefaultParams();
- default_params.AddParam("graph", BenchmarkParam::Create<std::string>(""));
- default_params.AddParam("input_layer",
- BenchmarkParam::Create<std::string>(""));
- default_params.AddParam("input_layer_shape",
- BenchmarkParam::Create<std::string>(""));
- default_params.AddParam("use_nnapi", BenchmarkParam::Create<bool>(false));
- return default_params;
-}
-
-BenchmarkTfLiteModel::BenchmarkTfLiteModel()
- : BenchmarkTfLiteModel(DefaultParams()) {}
-
-BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
- : BenchmarkModel(std::move(params)) {
- AddListener(&profiling_listener_);
- AddListener(&gemmlowp_profiling_listener_);
-}
-
-std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
- std::vector<Flag> flags = BenchmarkTfLiteModel::BenchmarkModel::GetFlags();
- std::vector<Flag> specific_flags = {
- CreateFlag<std::string>("graph", ¶ms_, "graph file name"),
- CreateFlag<std::string>("input_layer", ¶ms_, "input layer names"),
- CreateFlag<std::string>("input_layer_shape", ¶ms_,
- "input layer shape"),
- CreateFlag<bool>("use_nnapi", ¶ms_, "use nnapi api")};
-
- flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
- return flags;
-}
-
-void BenchmarkTfLiteModel::LogParams() {
- BenchmarkModel::LogParams();
- TFLITE_LOG(INFO) << "Graph: [" << params_.Get<std::string>("graph") << "]";
- TFLITE_LOG(INFO) << "Input layers: ["
- << params_.Get<std::string>("input_layer") << "]";
- TFLITE_LOG(INFO) << "Input shapes: ["
- << params_.Get<std::string>("input_layer_shape") << "]";
- TFLITE_LOG(INFO) << "Use nnapi : [" << params_.Get<bool>("use_nnapi") << "]";
-}
-
-bool BenchmarkTfLiteModel::ValidateParams() {
- if (params_.Get<std::string>("graph").empty()) {
- TFLITE_LOG(ERROR)
- << "Please specify the name of your TF Lite input file with --graph";
- return false;
- }
- return PopulateInputLayerInfo(params_.Get<std::string>("input_layer"),
- params_.Get<std::string>("input_layer_shape"),
- &inputs);
-}
-
-uint64_t BenchmarkTfLiteModel::ComputeInputBytes() {
- TFLITE_BENCHMARK_CHECK(interpreter);
- uint64_t total_input_bytes = 0;
- for (int input : interpreter->inputs()) {
- auto* t = interpreter->tensor(input);
- total_input_bytes += t->bytes;
- }
- return total_input_bytes;
-}
-
-void BenchmarkTfLiteModel::PrepareInputsAndOutputs() {
- auto interpreter_inputs = interpreter->inputs();
- // Set the values of the input tensors.
- for (int j = 0; j < interpreter_inputs.size(); ++j) {
- int i = interpreter_inputs[j];
- TfLiteTensor* t = interpreter->tensor(i);
- std::vector<int> sizes = TfLiteIntArrayToVector(t->dims);
- // TODO(ahentz): below we ignore the O-th dimension (number of batches).
- if (t->type == kTfLiteFloat32) {
- FillRandomValue<float>(
- interpreter->typed_tensor<float>(i),
- std::vector<int>(sizes.begin() + 1, sizes.end()),
- []() { return static_cast<float>(rand()) / RAND_MAX - 0.5f; });
- } else if (t->type == kTfLiteInt32) {
- // TODO(yunluli): This is currently only used for handling embedding input
- // for speech models. Generalize if necessary.
- FillRandomValue<int32_t>(
- interpreter->typed_tensor<int32_t>(i),
- std::vector<int32_t>(sizes.begin() + 1, sizes.end()),
- []() { return static_cast<int32_t>(rand()) % 100; });
- } else if (t->type == kTfLiteUInt8) {
- FillRandomValue<uint8_t>(
- interpreter->typed_tensor<uint8_t>(i),
- std::vector<int>(sizes.begin() + 1, sizes.end()),
- []() { return static_cast<uint8_t>(rand()) % 255; });
- } else if (t->type == kTfLiteInt8) {
- FillRandomValue<int8_t>(
- interpreter->typed_tensor<int8_t>(i),
- std::vector<int>(sizes.begin() + 1, sizes.end()),
- []() { return static_cast<int8_t>(rand()) % 255 - 127; });
- } else if (t->type == kTfLiteString) {
- tflite::DynamicBuffer buffer;
- FillRandomString(&buffer, sizes, []() {
- return "we're have some friends over saturday to hang out in the yard";
- });
- buffer.WriteToTensor(interpreter->tensor(i), /*new_shape=*/nullptr);
- } else {
- TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name
- << " of type " << t->type;
- }
- }
-}
-
-void BenchmarkTfLiteModel::Init() {
- std::string graph = params_.Get<std::string>("graph");
- model = tflite::FlatBufferModel::BuildFromFile(graph.c_str());
- if (!model) {
- TFLITE_LOG(FATAL) << "Failed to mmap model " << graph;
- }
- TFLITE_LOG(INFO) << "Loaded model " << graph;
- model->error_reporter();
- TFLITE_LOG(INFO) << "resolved reporter";
-
-#ifdef TFLITE_CUSTOM_OPS_HEADER
- tflite::MutableOpResolver resolver;
- RegisterSelectedOps(&resolver);
-#else
- nnfw::tflite::BuiltinOpResolver resolver;
-#endif
-
- tflite::InterpreterBuilder(*model, resolver)(&interpreter);
- if (!interpreter) {
- TFLITE_LOG(FATAL) << "Failed to construct interpreter";
- }
- profiling_listener_.SetInterpreter(interpreter.get());
- ::profiling::Context::get().setProfiler(interpreter->GetProfiler());
-
- auto enable_sync = std::getenv("PROFILING_OP_SYNC");
- if (enable_sync && std::strtol(enable_sync, NULL, 0) != 0)
- {
- ::profiling::Context::get().setSync();
- }
-
- const int32_t num_threads = params_.Get<int32_t>("num_threads");
-
- if (num_threads != -1) {
- interpreter->SetNumThreads(num_threads);
- }
-
- bool use_nnapi = params_.Get<bool>("use_nnapi");
-
- interpreter->UseNNAPI(use_nnapi);
- if (use_nnapi) {
- if (nnfw_delegate_.BuildGraph(&(interpreter.get()->primary_subgraph())) != kTfLiteOk) {
- TFLITE_LOG(FATAL) << "Failed to BuildGraph!";
- }
- }
- ApplyDelegates();
-
- auto interpreter_inputs = interpreter->inputs();
-
- if (!inputs.empty()) {
- TFLITE_BENCHMARK_CHECK_EQ(inputs.size(), interpreter_inputs.size())
- << "Inputs mismatch: Model inputs #:" << interpreter_inputs.size()
- << " expected: " << inputs.size();
- }
-
- // TFLITE_BENCHMARK_CHECK that all names and types match
- for (int j = 0; j < inputs.size(); ++j) {
- const InputLayerInfo& input = inputs[j];
- int i = interpreter_inputs[j];
- TfLiteTensor* t = interpreter->tensor(i);
- TFLITE_BENCHMARK_CHECK_EQ(t->name, input.name)
- << "Tensor # " << i << " is named " << t->name << " but flags call it "
- << input.name;
- }
-
- // Resize all non-string tensors.
- for (int j = 0; j < inputs.size(); ++j) {
- const InputLayerInfo& input = inputs[j];
- int i = interpreter_inputs[j];
- TfLiteTensor* t = interpreter->tensor(i);
- if (t->type != kTfLiteString) {
- interpreter->ResizeInputTensor(i, input.shape);
- }
- }
-
- if (interpreter->AllocateTensors() != kTfLiteOk) {
- TFLITE_LOG(FATAL) << "Failed to allocate tensors!";
- }
-}
-
-void BenchmarkTfLiteModel::RunImpl() {
- bool use_nnapi = params_.Get<bool>("use_nnapi");
- if (use_nnapi) {
- if (nnfw_delegate_.Invoke(&interpreter->primary_subgraph()) != kTfLiteOk) {
- TFLITE_LOG(FATAL) << "Failed to invoke!";
- }
- } else {
- if (interpreter->Invoke() != kTfLiteOk) {
- TFLITE_LOG(FATAL) << "Failed to invoke!";
- }
- }
-}
-
-} // namespace benchmark
-} // namespace tflite
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/profiling/profile_summarizer.h"
-
-#include <sstream>
-
-#include "tensorflow/lite/schema/schema_generated.h"
-
-namespace tflite {
-namespace profiling {
-namespace {
-
-struct OperatorDetails {
- std::string name;
- std::vector<std::string> inputs;
- std::vector<std::string> outputs;
-};
-
-std::string GetTensorName(const tflite::Interpreter& interpreter,
- int tensor_index) {
- const auto tensor = interpreter.tensor(tensor_index);
- if (tensor == nullptr || tensor->name == nullptr) {
- return "Unknown";
- }
- return tensor->name;
-}
-std::vector<std::string> GetTensorNames(const tflite::Interpreter& interpreter,
- const TfLiteIntArray* tensor_indices) {
- std::vector<std::string> tensors;
- tensors.reserve(tensor_indices->size);
- for (int i = 0; i < tensor_indices->size; i++) {
- tensors.push_back(GetTensorName(interpreter, tensor_indices->data[i]));
- }
- return tensors;
-}
-
-std::string ToString(const std::vector<std::string>& str_vector) {
- std::stringstream stream;
- stream << "[";
- bool first = true;
- for (const auto& s : str_vector) {
- if (!first) {
- stream << ", ";
- } else {
- first = false;
- }
- stream << s;
- }
- stream << "]";
- return stream.str();
-}
-
-OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter,
- int node_index) {
- auto node_reg = interpreter.node_and_registration(node_index);
- auto inputs = node_reg->first.inputs;
- auto outputs = node_reg->first.outputs;
- int code = node_reg->second.builtin_code;
- const char* op_name = nullptr;
- if (code == tflite::BuiltinOperator_CUSTOM) {
- const char* custom_name = node_reg->second.custom_name;
- op_name = custom_name ? custom_name : "UnknownCustomOp";
- } else {
- op_name = tflite::EnumNamesBuiltinOperator()[code];
- }
- const char* profiling_string =
- interpreter.OpProfilingString(node_reg->second, &node_reg->first);
- OperatorDetails details;
- details.name = op_name;
- if (profiling_string) {
- details.name += ":" + std::string(profiling_string);
- }
- details.inputs = GetTensorNames(interpreter, inputs);
- details.outputs = GetTensorNames(interpreter, outputs);
- return details;
-}
-
-tensorflow::StatSummarizerOptions GetProfileSummarizerOptions() {
- auto options = tensorflow::StatSummarizerOptions();
- options.show_summary = true;
- options.show_memory = false;
- return options;
-}
-
-} // namespace
-
-ProfileSummarizer::ProfileSummarizer()
- : stats_calculator_(
- new ::tensorflow::StatsCalculator(GetProfileSummarizerOptions())) {}
-
-void ProfileSummarizer::ProcessProfiles(
- const std::vector<const ProfileEvent*>& profile_stats,
- const tflite::Interpreter& interpreter) {
- std::vector<const ProfileEvent*> events;
- std::copy_if(profile_stats.begin(), profile_stats.end(),
- std::back_inserter(events), [](const ProfileEvent* e) {
- return e->event_type ==
- ProfileEvent::EventType::OPERATOR_INVOKE_EVENT &&
- e->end_timestamp_us >= e->begin_timestamp_us;
- });
- // Sort with begin_time.
- std::sort(events.begin(), events.end(),
- [](const ProfileEvent* const& a, const ProfileEvent* const& b) {
- return a->begin_timestamp_us < b->begin_timestamp_us;
- });
- if (events.empty()) {
- return;
- }
-
- int64_t base_start_us = events[0]->begin_timestamp_us;
- int node_num = 0;
- int64_t curr_total_us = 0;
- int prev_op_idx = -1;
- int child_op_no = 1;
- for (auto event : events) {
- auto op_details = GetOperatorDetails(interpreter, event->event_metadata);
- bool from_same_op = (prev_op_idx == event->event_metadata);
- child_op_no = from_same_op ? child_op_no + 1 : 1;
- auto node_name = ToString(op_details.outputs) + "#" + std::to_string(child_op_no);
- int64_t start_us = event->begin_timestamp_us - base_start_us;
- int64_t node_exec_time =
- event->end_timestamp_us - event->begin_timestamp_us;
- stats_calculator_->AddNodeStats(node_name, op_details.name, node_num,
- start_us, node_exec_time, 0 /*memory */);
- curr_total_us += node_exec_time;
- ++node_num;
- prev_op_idx = event->event_metadata;
- }
- stats_calculator_->UpdateRunTotalUs(curr_total_us);
-}
-} // namespace profiling
-} // namespace tflite
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/util/stats_calculator.h"
-
-#include <iomanip>
-#include <map>
-#include <queue>
-#include <sstream>
-#include <string>
-#include <algorithm>
-
-namespace tensorflow {
-
-StatsCalculator::StatsCalculator(const StatSummarizerOptions& options)
- : options_(options) {}
-
-std::string StatsCalculator::GetShortSummary() const {
- std::stringstream stream;
- stream << "Timings (microseconds): ";
- run_total_us_.OutputToStream(&stream);
- stream << std::endl;
-
- stream << "Memory (bytes): ";
- memory_.OutputToStream(&stream);
- stream << std::endl;
-
- stream << details_.size() << " nodes observed" << std::endl;
- return stream.str();
-}
-
-std::ostream& InitField(std::ostream& stream, int width) {
- stream << "\t" << std::right << std::setw(width) << std::fixed
- << std::setprecision(3);
- return stream;
-}
-
-std::string StatsCalculator::HeaderString(const std::string& title) const {
- std::stringstream stream;
-
- stream << "============================== " << title
- << " ==============================" << std::endl;
-
- InitField(stream, 24) << "[node type]";
- InitField(stream, 9) << "[start]";
- InitField(stream, 9) << "[first]";
- InitField(stream, 9) << "[avg ms]";
- InitField(stream, 8) << "[%]";
- InitField(stream, 8) << "[cdf%]";
- InitField(stream, 10) << "[mem KB]";
- InitField(stream, 9) << "[times called]";
- stream << "\t"
- << "[Name]";
- return stream.str();
-}
-
-std::string StatsCalculator::ColumnString(const Detail& detail,
- const int64_t cumulative_stat_on_node,
- const Stat<int64_t>& stat) const {
- const double start_ms = detail.start_us.avg() / 1000.0;
- const double first_time_ms = detail.rel_end_us.first() / 1000.0;
- const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
- const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
- const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
- const int64_t times_called = detail.times_called / num_runs();
-
- std::stringstream stream;
- InitField(stream, 24) << detail.type;
- InitField(stream, 9) << start_ms;
- InitField(stream, 9) << first_time_ms;
- InitField(stream, 9) << avg_time_ms;
- InitField(stream, 7) << percentage << "%";
- InitField(stream, 7) << cdf_percentage << "%";
- InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
- InitField(stream, 9) << times_called;
- stream << "\t" << detail.name;
-
- return stream.str();
-}
-
-void StatsCalculator::OrderNodesByMetric(
- SortingMetric metric, std::vector<const Detail*>* details) const {
- std::priority_queue<std::pair<std::string, const Detail*>> sorted_list;
- const int num_nodes = details_.size();
-
- for (const auto& det : details_) {
- const Detail* detail = &(det.second);
- std::stringstream stream;
- stream << std::setw(20) << std::right << std::setprecision(10)
- << std::fixed;
-
- switch (metric) {
- case BY_NAME:
- stream << detail->name;
- break;
- case BY_RUN_ORDER:
- stream << num_nodes - detail->run_order;
- break;
- case BY_TIME:
- stream << detail->rel_end_us.avg();
- break;
- case BY_MEMORY:
- stream << detail->mem_used.avg();
- break;
- case BY_TYPE:
- stream << detail->type;
- break;
- default:
- stream << "";
- break;
- }
-
- sorted_list.emplace(stream.str(), detail);
- }
-
- while (!sorted_list.empty()) {
- auto entry = sorted_list.top();
- sorted_list.pop();
- details->push_back(entry.second);
- }
-}
-
-void StatsCalculator::ComputeStatsByType(
- std::map<std::string, int64_t>* node_type_map_count,
- std::map<std::string, int64_t>* node_type_map_time,
- std::map<std::string, int64_t>* node_type_map_memory,
- std::map<std::string, int64_t>* node_type_map_times_called,
- int64_t* accumulated_us) const {
- int64_t run_count = run_total_us_.count();
-
- for (const auto& det : details_) {
- const std::string node_name = det.first;
- const Detail& detail = det.second;
-
- int64_t curr_time_val =
- static_cast<int64_t>(detail.rel_end_us.sum() / run_count);
- *accumulated_us += curr_time_val;
-
- int64_t curr_memory_val = detail.mem_used.newest();
-
- const std::string& node_type = detail.type;
-
- const std::string sharp1("#1");
- bool first = std::mismatch(sharp1.rbegin(), sharp1.rend(), node_name.rbegin()).first == sharp1.rend();
-
- if (first) {
- (*node_type_map_count)[node_type] += 1;
- (*node_type_map_times_called)[node_type] += detail.times_called / run_count;
- }
- (*node_type_map_time)[node_type] += curr_time_val;
- (*node_type_map_memory)[node_type] += curr_memory_val;
- }
-}
-
-std::string StatsCalculator::GetStatsByNodeType() const {
- std::stringstream stream;
-
- stream << "Number of nodes executed: " << details_.size() << std::endl;
-
- stream << "============================== Summary by node type "
- "=============================="
- << std::endl;
-
- std::map<std::string, int64_t> node_type_map_count;
- std::map<std::string, int64_t> node_type_map_time;
- std::map<std::string, int64_t> node_type_map_memory;
- std::map<std::string, int64_t> node_type_map_times_called;
- int64_t accumulated_us = 0;
-
- ComputeStatsByType(&node_type_map_count, &node_type_map_time,
- &node_type_map_memory, &node_type_map_times_called,
- &accumulated_us);
-
- // Sort them.
- std::priority_queue<std::pair<int64_t, std::pair<std::string, int64_t>>>
- timings;
- for (const auto& node_type : node_type_map_time) {
- const int64_t mem_used = node_type_map_memory[node_type.first];
- timings.emplace(node_type.second,
- std::pair<std::string, int64_t>(node_type.first, mem_used));
- }
-
- InitField(stream, 24) << "[Node type]";
- InitField(stream, 9) << "[count]";
- InitField(stream, 10) << "[avg ms]";
- InitField(stream, 11) << "[avg %]";
- InitField(stream, 11) << "[cdf %]";
- InitField(stream, 10) << "[mem KB]";
- InitField(stream, 10) << "[times called]";
- stream << std::endl;
-
- float cdf = 0.0f;
- while (!timings.empty()) {
- auto entry = timings.top();
- timings.pop();
-
- const std::string node_type = entry.second.first;
- const float memory = entry.second.second / 1000.0f;
-
- const int64_t node_type_total_us = entry.first;
- const float time_per_run_ms = node_type_total_us / 1000.0f;
-
- const float percentage =
- ((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
- cdf += percentage;
-
- InitField(stream, 24) << node_type;
- InitField(stream, 9) << node_type_map_count[node_type];
- InitField(stream, 10) << time_per_run_ms;
- InitField(stream, 10) << percentage << "%";
- InitField(stream, 10) << cdf << "%";
- InitField(stream, 10) << memory;
- InitField(stream, 9) << node_type_map_times_called[node_type];
- stream << std::endl;
- }
- stream << std::endl;
- return stream.str();
-}
-
-std::string StatsCalculator::GetStatsByMetric(const std::string& title,
- SortingMetric sorting_metric,
- int num_stats) const {
- std::vector<const Detail*> details;
- OrderNodesByMetric(sorting_metric, &details);
-
- double cumulative_stat_on_node = 0;
-
- std::stringstream stream;
- stream << HeaderString(title) << std::endl;
- int stat_num = 0;
- for (auto detail : details) {
- ++stat_num;
- if (num_stats > 0 && stat_num > num_stats) {
- break;
- }
-
- // TODO(andrewharp): Make this keep track of the particular metric for cdf.
- cumulative_stat_on_node += detail->rel_end_us.sum();
- stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
- << std::endl;
- }
- stream << std::endl;
- return stream.str();
-}
-
-std::string StatsCalculator::GetOutputString() const {
- std::stringstream stream;
- if (options_.show_run_order) {
- stream << GetStatsByMetric("Run Order", BY_RUN_ORDER,
- options_.run_order_limit);
- }
- if (options_.show_time) {
- stream << GetStatsByMetric("Top by Computation Time", BY_TIME,
- options_.time_limit);
- }
- if (options_.show_memory) {
- stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY,
- options_.memory_limit);
- }
- if (options_.show_type) {
- stream << GetStatsByNodeType();
- }
- if (options_.show_summary) {
- stream << GetShortSummary() << std::endl;
- }
- return stream.str();
-}
-
-void StatsCalculator::AddNodeStats(const std::string& name,
- const std::string& type, int64_t run_order,
- int64_t start_us, int64_t rel_end_us,
- int64_t mem_used) {
- Detail* detail = nullptr;
- if (details_.find(name) == details_.end()) {
- details_.insert({name, {}});
- detail = &details_.at(name);
- detail->type = type;
- detail->name = name;
- detail->run_order = run_order;
- } else {
- detail = &details_.at(name);
- }
- detail->start_us.UpdateStat(start_us);
- detail->rel_end_us.UpdateStat(rel_end_us);
- detail->mem_used.UpdateStat(mem_used);
- detail->times_called++;
-}
-
-} // namespace tensorflow
#include <tflite/Assert.h>
#include <tflite/InterpreterSession.h>
-#include <tflite/interp/FlatBufferBuilder.h>
-#include <iostream>
#include <fstream>
+#include <iostream>
#include <memory>
+#include <string>
const int RUN_FAILED = 1;
-using namespace tflite;
using namespace nnfw::tflite;
const int FILE_ERROR = 2;
}
// Read vector of floats from selected file
-void readData(const string &path, std::vector<uint8_t> &dest)
+void readData(const std::string &path, std::vector<uint8_t> &dest)
{
std::ifstream in(path);
if (!in.good())
}
template <typename T>
-bool compareBuffersExact(const T *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
+bool isClose(const T *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
{
+ // TODO better way for handling quant error?
+ auto tolerance = static_cast<uint64_t>(nnfw::misc::EnvVar("TOLERANCE").asInt(0));
bool match = true;
+
for (uint32_t e = 0; e < act_buf.size() / sizeof(T); e++)
{
T ref = ref_buf[e];
T act = reinterpret_cast<const T *>(act_buf.data())[e];
+ uint64_t diff = static_cast<uint64_t>(((ref > act) ? (ref - act) : (act - ref)));
- if (ref != act)
+ if (ref != act && diff > tolerance)
{
std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
- << ", act: " << act << std::endl;
+ << ", act: " << act << " (diff: " << diff << ")" << std::endl;
match = false;
}
}
return match;
}
-bool compareBuffersExactBool(const uint8_t *ref_buf, const std::vector<uint8_t> &act_buf,
- uint32_t index)
+template <>
+bool isClose<float>(const float *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
+{
+ uint32_t tolerance = nnfw::misc::EnvVar("TOLERANCE").asInt(1);
+ bool match = true;
+
+ for (uint32_t e = 0; e < act_buf.size() / sizeof(float); e++)
+ {
+ float ref = ref_buf[e];
+ float act = reinterpret_cast<const float *>(act_buf.data())[e];
+ float diff = std::fabs(ref - act);
+
+ bool match_elem = nnfw::misc::fp32::absolute_epsilon_equal(ref, act)
+ ? true
+ : nnfw::misc::fp32::epsilon_equal(ref, act, tolerance);
+
+ if (!match_elem)
+ {
+ std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
+ << ", act: " << act << " (diff: " << diff << ")" << std::endl;
+ match = false;
+ }
+ }
+
+ return match;
+}
+
+bool exact(const uint8_t *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
{
bool match = true;
for (uint32_t e = 0; e < act_buf.size() / sizeof(uint8_t); e++)
// Compare with tflite
std::cout << "[Comparison] Stage start!" << std::endl;
// Read tflite model
- StderrReporter error_reporter;
- auto model = FlatBufferModel::BuildFromFile(tflite_file.c_str(), &error_reporter);
- auto builder = FlatBufferBuilder(*model);
+ auto model = TfLiteModelCreateFromFile(tflite_file.c_str());
+ auto options = TfLiteInterpreterOptionsCreate();
+ TfLiteInterpreterOptionsSetNumThreads(options, nnfw::misc::EnvVar("THREAD").asInt(1));
+ auto interpreter = TfLiteInterpreterCreate(model, options);
- std::unique_ptr<Interpreter> interpreter;
- try
- {
- interpreter = builder.build();
- }
- catch (const std::exception &e)
- {
- std::cerr << e.what() << std::endl;
- exit(FILE_ERROR);
- }
- interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(1));
-
- auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
+ auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter);
sess->prepare();
// Set input and run
for (uint32_t i = 0; i < num_inputs; i++)
{
- auto input_tensor = interpreter->tensor(interpreter->inputs().at(i));
- memcpy(input_tensor->data.uint8, inputs[i].data(), inputs[i].size());
+ auto input_tensor = TfLiteInterpreterGetInputTensor(interpreter, i);
+ memcpy(TfLiteTensorData(input_tensor), inputs[i].data(), inputs[i].size());
}
if (!sess->run())
{
}
std::cout << "[Comparison] TFLite run done!" << std::endl;
- // Calculate max difference over all outputs
- float max_float_difference = 0.0f;
bool find_unmatched_output = false;
- auto tolerance = nnfw::misc::EnvVar("TOLERANCE").asInt(1);
for (uint32_t out_idx = 0; out_idx < num_outputs; out_idx++)
{
bool matched = true;
// Check output tensor values
-
- const auto &ref_output = interpreter->tensor(interpreter->outputs().at(out_idx))->data;
+ auto output_tensor = TfLiteInterpreterGetOutputTensor(interpreter, out_idx);
+ auto ref_output = TfLiteTensorData(output_tensor);
const auto &output = outputs[out_idx];
switch (ti.dtype)
{
case NNFW_TYPE_TENSOR_BOOL:
- matched = compareBuffersExactBool(ref_output.uint8, output, out_idx);
+ matched = exact(reinterpret_cast<uint8_t *>(ref_output), output, out_idx);
break;
case NNFW_TYPE_TENSOR_UINT8:
case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
- matched = compareBuffersExact<uint8_t>(ref_output.uint8, output, out_idx);
+ matched = isClose<uint8_t>(reinterpret_cast<uint8_t *>(ref_output), output, out_idx);
break;
case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
- matched = compareBuffersExact<int8_t>(ref_output.int8, output, out_idx);
+ matched = isClose<int8_t>(reinterpret_cast<int8_t *>(ref_output), output, out_idx);
break;
case NNFW_TYPE_TENSOR_INT32:
- matched = compareBuffersExact<int32_t>(ref_output.i32, output, out_idx);
+ matched = isClose<int32_t>(reinterpret_cast<int32_t *>(ref_output), output, out_idx);
break;
case NNFW_TYPE_TENSOR_FLOAT32:
- // TODO better way for handling FP error?
- for (uint32_t e = 0; e < num_elems(&ti); e++)
- {
- float refval = ref_output.f[e];
- float val = reinterpret_cast<const float *>(output.data())[e];
- if (std::abs(refval - val) > max_float_difference)
- max_float_difference = std::abs(refval - val);
-
- matched = nnfw::misc::fp32::absolute_epsilon_equal(refval, val)
- ? true
- : nnfw::misc::fp32::epsilon_equal(refval, val, tolerance);
- }
+ matched = isClose<float>(reinterpret_cast<float *>(ref_output), output, out_idx);
break;
case NNFW_TYPE_TENSOR_INT64:
- matched = compareBuffersExact<int64_t>(ref_output.i64, output, out_idx);
+ matched = isClose<int64_t>(reinterpret_cast<int64_t *>(ref_output), output, out_idx);
break;
default:
throw std::runtime_error{"Invalid tensor type"};
}
// Print results
- std::cout << "[Comparison] Max float difference: " << max_float_difference << std::endl;
int ret = 0;
if (find_unmatched_output)
{
## Link test executable against gtest & gtest_main
target_link_libraries(tflite_test gtest gtest_main ${LIB_PTHREAD})
## install test binary for packaging
-install(TARGETS tflite_test DESTINATION unittest_standalone)
+install(TARGETS tflite_test DESTINATION unittest)
#include <iostream>
#include <cstring>
-#include "tensorflow/lite/interpreter.h"
+#include <tensorflow/lite/c/c_api.h>
namespace TFLiteRun
{
// DO NOTHING
}
-void TensorDumper::addTensors(tflite::Interpreter &interpreter, const std::vector<int> &indices)
+void TensorDumper::addInputTensors(TfLiteInterpreter &interpreter)
{
- for (const auto &o : indices)
+ auto const input_count = TfLiteInterpreterGetInputTensorCount(&interpreter);
+ for (int32_t idx = 0; idx < input_count; idx++)
{
- const TfLiteTensor *tensor = interpreter.tensor(o);
- int size = tensor->bytes;
+ const TfLiteTensor *tensor = TfLiteInterpreterGetInputTensor(&interpreter, idx);
+ auto size = TfLiteTensorByteSize(tensor);
std::vector<char> buffer;
buffer.resize(size);
- memcpy(buffer.data(), tensor->data.raw, size);
- _tensors.emplace_back(o, std::move(buffer));
+ memcpy(buffer.data(), TfLiteTensorData(tensor), size);
+ _input_tensors.emplace_back(idx, std::move(buffer));
+ }
+}
+
+void TensorDumper::addOutputTensors(TfLiteInterpreter &interpreter)
+{
+ auto const output_count = TfLiteInterpreterGetOutputTensorCount(&interpreter);
+ for (int32_t idx = 0; idx < output_count; idx++)
+ {
+ const TfLiteTensor *tensor = TfLiteInterpreterGetOutputTensor(&interpreter, idx);
+ auto size = TfLiteTensorByteSize(tensor);
+ std::vector<char> buffer;
+ buffer.resize(size);
+ memcpy(buffer.data(), TfLiteTensorData(tensor), size);
+ _output_tensors.emplace_back(idx, std::move(buffer));
}
}
std::ofstream file(filename, std::ios::out | std::ios::binary);
// Write number of tensors
- uint32_t num_tensors = static_cast<uint32_t>(_tensors.size());
+ uint32_t num_tensors =
+ static_cast<uint32_t>(_input_tensors.size()) + static_cast<uint32_t>(_output_tensors.size());
file.write(reinterpret_cast<const char *>(&num_tensors), sizeof(num_tensors));
- // Write tensor indices
- for (const auto &t : _tensors)
+ // Write input tensor indices
+ for (const auto &t : _input_tensors)
{
file.write(reinterpret_cast<const char *>(&t._index), sizeof(int));
}
- // Write data
- for (const auto &t : _tensors)
+ // Write output tensor indices
+ for (const auto &t : _output_tensors)
+ {
+ file.write(reinterpret_cast<const char *>(&t._index), sizeof(int));
+ }
+
+ // Write input data
+ for (const auto &t : _input_tensors)
+ {
+ file.write(t._data.data(), t._data.size());
+ }
+
+ // Write output data
+ for (const auto &t : _output_tensors)
{
file.write(t._data.data(), t._data.size());
}
#ifndef __TFLITE_RUN_TENSOR_DUMPER_H__
#define __TFLITE_RUN_TENSOR_DUMPER_H__
+#include <tensorflow/lite/c/c_api.h>
+
#include <memory>
#include <string>
#include <vector>
public:
TensorDumper();
- void addTensors(tflite::Interpreter &interpreter, const std::vector<int> &indices);
+ void addInputTensors(TfLiteInterpreter &interpreter);
+ void addOutputTensors(TfLiteInterpreter &interpreter);
void dump(const std::string &filename) const;
private:
- std::vector<Tensor> _tensors;
+ std::vector<Tensor> _input_tensors;
+ std::vector<Tensor> _output_tensors;
};
} // end of namespace TFLiteRun
#include <assert.h>
+#include <cstring>
#include <fstream>
#include "misc/tensor/Shape.h"
namespace TFLiteRun
{
-TensorLoader::TensorLoader(tflite::Interpreter &interpreter)
+TensorLoader::TensorLoader(TfLiteInterpreter &interpreter)
: _interpreter(interpreter), _raw_data(nullptr)
{
}
int tensor_indices_raw[num_tensors];
file.read(reinterpret_cast<char *>(tensor_indices_raw), sizeof(tensor_indices_raw));
- std::vector<int> tensor_indices(tensor_indices_raw, tensor_indices_raw + num_tensors);
_raw_data = std::unique_ptr<float[]>(new float[file_size]);
file.read(reinterpret_cast<char *>(_raw_data.get()), file_size);
file.close();
- size_t read_bytes = loadTensorsFromRawData(tensor_indices);
+ size_t read_bytes = loadInputTensorsFromRawData();
+ read_bytes += loadOutputTensorsFromRawData();
// The file size and total output tensor size must match
assert(file_size ==
sizeof(num_tensors) + sizeof(tensor_indices_raw) + read_bytes * sizeof(float));
}
-void TensorLoader::loadRawTensors(const std::string &filename,
- const std::vector<int> &tensor_indices)
+void TensorLoader::loadRawInputTensors(const std::string &filename)
{
// TODO Handle file open/read error
std::ifstream file(filename, std::ios::ate | std::ios::binary);
file.read(reinterpret_cast<char *>(_raw_data.get()), file_size);
file.close();
- size_t read_bytes = loadTensorsFromRawData(tensor_indices);
+ size_t read_bytes = loadInputTensorsFromRawData();
// The file size and total output tensor size must match
assert(file_size == read_bytes * sizeof(float));
}
-size_t TensorLoader::loadTensorsFromRawData(const std::vector<int> &tensor_indices)
+size_t TensorLoader::loadInputTensorsFromRawData()
{
size_t offset = 0;
- for (const auto &o : tensor_indices)
+ auto const input_count = TfLiteInterpreterGetInputTensorCount(&_interpreter);
+ for (auto idx = 0; idx < input_count; idx++)
{
- const TfLiteTensor *tensor = _interpreter.tensor(o);
+ const TfLiteTensor *tensor = TfLiteInterpreterGetInputTensor(&_interpreter, idx);
// Convert tensor shape to `Shape` from `tensor->dims`
- nnfw::misc::tensor::Shape shape(static_cast<size_t>(tensor->dims->size));
- for (int d = 0; d < tensor->dims->size; d++)
+ nnfw::misc::tensor::Shape shape(TfLiteTensorNumDims(tensor));
+ for (int32_t d = 0; d < TfLiteTensorNumDims(tensor); d++)
{
- shape.dim(d) = tensor->dims->data[d];
+ shape.dim(d) = TfLiteTensorDim(tensor, d);
}
float *base = _raw_data.get() + offset;
- assert(tensor->bytes % sizeof(float) == 0);
- offset += (tensor->bytes / sizeof(float));
+ assert(TfLiteTensorByteSize(tensor) % sizeof(float) == 0);
+ offset += (TfLiteTensorByteSize(tensor) / sizeof(float));
- _tensor_map.insert(std::make_pair(o, nnfw::tflite::TensorView<float>(shape, base)));
+ _input_tensor_map.emplace(idx, nnfw::tflite::TensorView<float>(shape, base));
+
+ memcpy(TfLiteTensorData(tensor), reinterpret_cast<const void *>(base),
+ TfLiteTensorByteSize(tensor));
+ }
+
+ return offset;
+}
+
+size_t TensorLoader::loadOutputTensorsFromRawData()
+{
+ size_t offset = 0;
+ auto const output_count = TfLiteInterpreterGetOutputTensorCount(&_interpreter);
+ for (auto idx = 0; idx < output_count; idx++)
+ {
+ const TfLiteTensor *tensor = TfLiteInterpreterGetOutputTensor(&_interpreter, idx);
+
+ // Convert tensor shape to `Shape` from `tensor->dims`
+ nnfw::misc::tensor::Shape shape(TfLiteTensorNumDims(tensor));
+ for (int32_t d = 0; d < TfLiteTensorNumDims(tensor); d++)
+ {
+ shape.dim(d) = TfLiteTensorDim(tensor, d);
+ }
+
+ float *base = _raw_data.get() + offset;
+
+ assert(TfLiteTensorByteSize(tensor) % sizeof(float) == 0);
+ offset += (TfLiteTensorByteSize(tensor) / sizeof(float));
+
+ _output_tensor_map.emplace(idx, nnfw::tflite::TensorView<float>(shape, base));
+
+ memcpy(TfLiteTensorData(tensor), reinterpret_cast<const void *>(base),
+ TfLiteTensorByteSize(tensor));
}
return offset;
}
-const nnfw::tflite::TensorView<float> &TensorLoader::get(int tensor_idx) const
+const nnfw::tflite::TensorView<float> &TensorLoader::getOutput(int tensor_idx) const
{
- auto found = _tensor_map.find(tensor_idx);
- assert(found != _tensor_map.end());
+ auto found = _output_tensor_map.find(tensor_idx);
+ assert(found != _output_tensor_map.end());
return found->second;
}
#ifndef __TFLITE_RUN_TENSOR_LOADER_H__
#define __TFLITE_RUN_TENSOR_LOADER_H__
+#include "tflite/TensorView.h"
+
#include <sys/mman.h>
+#include <memory>
#include <string>
#include <unordered_map>
-#include "tflite/TensorView.h"
-
namespace tflite
{
class Interpreter;
class TensorLoader
{
public:
- TensorLoader(tflite::Interpreter &interpreter);
+ TensorLoader(TfLiteInterpreter &interpreter);
void loadDumpedTensors(const std::string &filename);
- void loadRawTensors(const std::string &filename, const std::vector<int> &tensor_indices);
- const nnfw::tflite::TensorView<float> &get(int tensor_idx) const;
- size_t getNums() const { return _tensor_map.size(); }
+ void loadRawInputTensors(const std::string &filename);
+ const nnfw::tflite::TensorView<float> &getOutput(int tensor_idx) const;
private:
- size_t loadTensorsFromRawData(const std::vector<int> &tensor_indices);
- tflite::Interpreter &_interpreter;
+ size_t loadInputTensorsFromRawData();
+ size_t loadOutputTensorsFromRawData();
+ TfLiteInterpreter &_interpreter;
std::unique_ptr<float[]> _raw_data;
- std::unordered_map<int, nnfw::tflite::TensorView<float>> _tensor_map;
+ std::unordered_map<int, nnfw::tflite::TensorView<float>> _input_tensor_map;
+ std::unordered_map<int, nnfw::tflite::TensorView<float>> _output_tensor_map;
};
} // end of namespace TFLiteRun
* limitations under the License.
*/
-#include "tensorflow/lite/kernels/register.h"
-#include "tensorflow/lite/model.h"
-
#include "args.h"
#include "tensor_dumper.h"
#include "tensor_loader.h"
#include "tflite/Session.h"
#include "tflite/RandomInputInitializer.h"
#include "tflite/InterpreterSession.h"
-#include "tflite/NNAPISession.h"
#include "misc/tensor/IndexIterator.h"
#include "misc/tensor/Object.h"
#include "benchmark.h"
static const char *default_backend_cand = "tflite_cpu";
-// Verifies whether the model is a flatbuffer file.
-class BMFlatBufferVerifier : public tflite::TfLiteVerifier
-{
-public:
- bool Verify(const char *data, int length, tflite::ErrorReporter *reporter) override
- {
-
- flatbuffers::Verifier verifier(reinterpret_cast<const uint8_t *>(data), length);
- if (!tflite::VerifyModelBuffer(verifier))
- {
- reporter->Report("The model is not a valid Flatbuffer file");
- return false;
- }
- return true;
- }
-};
-
} // namespace
int main(const int argc, char **argv)
{
- const bool use_nnapi = nnfw::misc::EnvVar("USE_NNAPI").asBool(false);
-
- StderrReporter error_reporter;
-
TFLiteRun::Args args(argc, argv);
std::chrono::milliseconds t_model_load(0), t_prepare(0);
benchmark::Phases phases(
benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
- std::unique_ptr<FlatBufferModel> model;
- std::unique_ptr<Interpreter> interpreter;
- std::unique_ptr<tflite::TfLiteVerifier> verifier{new BMFlatBufferVerifier};
+ TfLiteModel *model = nullptr;
try
{
phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
- if (args.getModelValidate())
- {
- model = FlatBufferModel::VerifyAndBuildFromFile(args.getTFLiteFilename().c_str(),
- verifier.get(), &error_reporter);
- }
- else
- {
- model = FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(), &error_reporter);
- }
- if (model == nullptr)
- {
- throw std::runtime_error{"Cannot create model"};
- }
-
- tflite::ops::builtin::BuiltinOpResolver resolver;
- InterpreterBuilder builder(*model, resolver);
- TFLITE_ENSURE(builder(&interpreter))
- interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(1));
+ model = TfLiteModelCreateFromFile(args.getTFLiteFilename().c_str());
});
}
catch (const std::exception &e)
return 1;
}
- std::shared_ptr<nnfw::tflite::Session> sess;
-
- if (use_nnapi)
- {
- sess = std::make_shared<nnfw::tflite::NNAPISession>(interpreter.get());
- }
- else
+ if (model == nullptr)
{
- sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
+ throw std::runtime_error{"Cannot create model"};
}
+ auto options = TfLiteInterpreterOptionsCreate();
+ TfLiteInterpreterOptionsSetNumThreads(options, nnfw::misc::EnvVar("THREAD").asInt(1));
+
+ TfLiteInterpreter *interpreter = TfLiteInterpreterCreate(model, options);
+ auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter);
try
{
phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) { sess->prepare(); });
if (args.getInputShapes().size() != 0)
{
- const int dim_values = args.getInputShapes().size();
- int offset = 0;
+ const auto dim_values = args.getInputShapes().size();
+ int32_t offset = 0;
- for (const auto &id : interpreter->inputs())
+ auto const input_count = TfLiteInterpreterGetInputTensorCount(interpreter);
+ for (int32_t id = 0; id < input_count; id++)
{
- TfLiteTensor *tensor = interpreter->tensor(id);
+ TfLiteTensor *tensor = TfLiteInterpreterGetInputTensor(interpreter, id);
std::vector<int32_t> new_dim;
- new_dim.resize(tensor->dims->size);
+ new_dim.resize(TfLiteTensorNumDims(tensor));
- for (uint32_t axis = 0; axis < tensor->dims->size; axis++, offset++)
+ for (int32_t axis = 0; axis < TfLiteTensorNumDims(tensor); axis++, offset++)
{
new_dim[axis] =
- ((offset < dim_values) ? args.getInputShapes()[offset] : tensor->dims->data[axis]);
+ ((offset < dim_values) ? args.getInputShapes()[offset] : TfLiteTensorDim(tensor, axis));
}
- interpreter->ResizeInputTensor(id, new_dim);
+ TfLiteInterpreterResizeInputTensor(interpreter, id, new_dim.data(), new_dim.size());
if (offset >= dim_values)
break;
}
- interpreter->AllocateTensors();
+ TfLiteInterpreterAllocateTensors(interpreter);
}
TFLiteRun::TensorLoader tensor_loader(*interpreter);
{
if (!args.getInputFilename().empty())
{
- tensor_loader.loadRawTensors(args.getInputFilename(), interpreter->inputs());
+ tensor_loader.loadRawInputTensors(args.getInputFilename());
}
else
{
tensor_loader.loadDumpedTensors(args.getCompareFilename());
}
-
- for (const auto &o : interpreter->inputs())
- {
- const auto &tensor_view = tensor_loader.get(o);
- TfLiteTensor *tensor = interpreter->tensor(o);
-
- memcpy(reinterpret_cast<void *>(tensor->data.f),
- reinterpret_cast<const void *>(tensor_view._base), tensor->bytes);
- }
}
else
{
nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
RandomInputInitializer initializer{randgen};
- initializer.run(*(interpreter.get()));
+ initializer.run(*interpreter);
}
TFLiteRun::TensorDumper tensor_dumper;
// Must be called before `interpreter->Invoke()`
- tensor_dumper.addTensors(*interpreter, interpreter->inputs());
+ tensor_dumper.addInputTensors(*interpreter);
std::cout << "input tensor indices = [";
- for (const auto &o : interpreter->inputs())
+ auto const input_count = TfLiteInterpreterGetInputTensorCount(interpreter);
+ for (int32_t idx = 0; idx < input_count; idx++)
{
- std::cout << o << ",";
+ std::cout << idx << ",";
}
std::cout << "]" << std::endl;
sess->teardown();
// Must be called after `interpreter->Invoke()`
- tensor_dumper.addTensors(*interpreter, interpreter->outputs());
+ tensor_dumper.addOutputTensors(*interpreter);
std::cout << "output tensor indices = [";
- for (const auto &o : interpreter->outputs())
+ auto const output_count = TfLiteInterpreterGetOutputTensorCount(interpreter);
+ for (int32_t idx = 0; idx < output_count; idx++)
{
- std::cout << o << "(";
-
- print_max_idx(interpreter->tensor(o)->data.f, interpreter->tensor(o)->bytes / sizeof(float));
+ auto tensor = TfLiteInterpreterGetOutputTensor(interpreter, idx);
+ print_max_idx(reinterpret_cast<float *>(TfLiteTensorData(tensor)),
+ TfLiteTensorByteSize(tensor) / sizeof(float));
std::cout << "),";
}
TfLiteInterpMatchApp app(comparator);
bool res = true;
- for (const auto &o : interpreter->outputs())
+ for (int32_t idx = 0; idx < output_count; idx++)
{
- auto expected = tensor_loader.get(o);
- auto obtained = nnfw::tflite::TensorView<float>::make(*interpreter, o);
+ auto expected = tensor_loader.getOutput(idx);
+ auto const tensor = TfLiteInterpreterGetOutputTensor(interpreter, idx);
+ auto obtained = nnfw::tflite::TensorView<float>::make(tensor);
- res = res && app.compareSingleTensorView(expected, obtained, o);
+ res = res && app.compareSingleTensorView(expected, obtained, idx);
}
if (!res)
+++ /dev/null
-if(NOT BUILD_TFLITE_VANILLA_RUN)
- return()
-endif()
-
-if(NOT BUILD_TENSORFLOW_LITE_2_8_0)
- set(BUILD_TENSORFLOW_LITE_2_8_0 ON)
- set(BUILD_TENSORFLOWRUY ON)
-endif()
-
-nnfw_find_package(TensorFlowLite EXACT 2.8.0 REQUIRED)
-nnfw_find_package(Boost REQUIRED program_options)
-
-list(APPEND TFLITE_RUN_SRCS "src/tflite_vanilla_run.cc")
-list(APPEND TFLITE_RUN_SRCS "src/args.cc")
-
-add_executable(tflite_vanilla_run ${TFLITE_RUN_SRCS})
-target_include_directories(tflite_vanilla_run PRIVATE src)
-target_include_directories(tflite_vanilla_run PRIVATE ${Boost_INCLUDE_DIRS})
-
-target_link_libraries(tflite_vanilla_run tensorflow-lite-2.8.0 ${LIB_PTHREAD} dl)
-target_link_libraries(tflite_vanilla_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
-target_link_libraries(tflite_vanilla_run nnfw_lib_benchmark nnfw_lib_misc)
-
-install(TARGETS tflite_vanilla_run DESTINATION bin)
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "args.h"
-
-#include <iostream>
-
-namespace TFLiteVanillaRun
-{
-
-Args::Args(const int argc, char **argv) noexcept
-{
- try
- {
- Initialize();
- Parse(argc, argv);
- }
- catch (const std::exception &e)
- {
- std::cerr << "error during paring args" << e.what() << '\n';
- exit(1);
- }
-}
-
-void Args::Initialize(void)
-{
- try
- {
- // General options
- po::options_description general("General options");
-
- // clang-format off
- general.add_options()
- ("help,h", "Display available options")
- ("input,i", po::value<std::string>()->default_value(""), "Input filename")
- ("dump,d", po::value<std::string>()->default_value(""), "Output filename")
- ("ishapes", po::value<std::vector<int>>()->multitoken(), "Input shapes")
- ("compare,c", po::value<std::string>()->default_value(""), "filename to be compared with")
- ("tflite", po::value<std::string>()->required())
- ("num_runs,r", po::value<int>()->default_value(1), "The number of runs")
- ("warmup_runs,w", po::value<int>()->default_value(0), "The number of warmup runs")
- ("run_delay,t", po::value<int>()->default_value(-1), "Delay time(ms) between runs (as default no delay")
- ("gpumem_poll,g", po::value<bool>()->default_value(false), "Check gpu memory polling separately")
- ("mem_poll,m", po::value<bool>()->default_value(false), "Check memory polling")
- ("write_report,p", po::value<bool>()->default_value(false), "Write report")
- ("validate", po::value<bool>()->default_value(true), "Validate tflite model")
- ("verbose_level,v", po::value<int>()->default_value(0), "Verbose level\n"
- "0: prints the only result. Messages btw run don't print\n"
- "1: prints result and message btw run\n"
- "2: prints all of messages to print\n")
- ;
- // clang-format on
-
- _options.add(general);
- _positional.add("tflite", 1);
- }
- catch (const std::bad_cast &e)
- {
- std::cerr << "error by bad cast during initialization of boost::program_options" << e.what()
- << '\n';
- exit(1);
- }
-}
-
-void Args::Parse(const int argc, char **argv)
-{
- po::variables_map vm;
- po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
- vm);
-
- {
- auto conflicting_options = [&](const std::string &o1, const std::string &o2) {
- if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted()))
- {
- throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 +
- "' cannot be given at once.");
- }
- };
-
- conflicting_options("input", "compare");
- }
-
- if (vm.count("help"))
- {
- std::cout << "tflite_run\n\n";
- std::cout << "Usage: " << argv[0] << " <.tflite> [<options>]\n\n";
- std::cout << _options;
- std::cout << "\n";
-
- exit(0);
- }
-
- po::notify(vm);
-
- if (vm.count("dump"))
- {
- _dump_filename = vm["dump"].as<std::string>();
- }
-
- if (vm.count("compare"))
- {
- _compare_filename = vm["compare"].as<std::string>();
- }
-
- if (vm.count("input"))
- {
- _input_filename = vm["input"].as<std::string>();
-
- if (!_input_filename.empty())
- {
- if (access(_input_filename.c_str(), F_OK) == -1)
- {
- std::cerr << "input image file not found: " << _input_filename << "\n";
- }
- }
- }
-
- if (vm.count("ishapes"))
- {
- _input_shapes.resize(vm["ishapes"].as<std::vector<int>>().size());
- for (auto i = 0; i < _input_shapes.size(); i++)
- {
- _input_shapes[i] = vm["ishapes"].as<std::vector<int>>()[i];
- }
- }
-
- if (vm.count("tflite"))
- {
- _tflite_filename = vm["tflite"].as<std::string>();
-
- if (_tflite_filename.empty())
- {
- // TODO Print usage instead of the below message
- std::cerr << "Please specify tflite file. Run with `--help` for usage."
- << "\n";
-
- exit(1);
- }
- else
- {
- if (access(_tflite_filename.c_str(), F_OK) == -1)
- {
- std::cerr << "tflite file not found: " << _tflite_filename << "\n";
- exit(1);
- }
- }
- }
-
- if (vm.count("num_runs"))
- {
- _num_runs = vm["num_runs"].as<int>();
- }
-
- if (vm.count("warmup_runs"))
- {
- _warmup_runs = vm["warmup_runs"].as<int>();
- }
-
- if (vm.count("run_delay"))
- {
- _run_delay = vm["run_delay"].as<int>();
- }
-
- if (vm.count("gpumem_poll"))
- {
- _gpumem_poll = vm["gpumem_poll"].as<bool>();
- }
-
- if (vm.count("mem_poll"))
- {
- _mem_poll = vm["mem_poll"].as<bool>();
- // Instead of EXECUTE to avoid overhead, memory polling runs on WARMUP
- if (_mem_poll && _warmup_runs == 0)
- {
- _warmup_runs = 1;
- }
- }
-
- if (vm.count("write_report"))
- {
- _write_report = vm["write_report"].as<bool>();
- }
-
- if (vm.count("validate"))
- {
- _tflite_validate = vm["validate"].as<bool>();
- }
-
- if (vm.count("verbose_level"))
- {
- _verbose_level = vm["verbose_level"].as<int>();
- }
-}
-
-} // end of namespace TFLiteVanillaRun
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __TFLITE_VANILLA_RUN_ARGS_H__
-#define __TFLITE_VANILLA_RUN_ARGS_H__
-
-#include <string>
-#include <boost/program_options.hpp>
-
-namespace po = boost::program_options;
-
-namespace TFLiteVanillaRun
-{
-
-class Args
-{
-public:
- Args(const int argc, char **argv) noexcept;
- void print(void);
-
- const std::string &getTFLiteFilename(void) const { return _tflite_filename; }
- const std::string &getDumpFilename(void) const { return _dump_filename; }
- const std::string &getCompareFilename(void) const { return _compare_filename; }
- const std::string &getInputFilename(void) const { return _input_filename; }
- const std::vector<int> &getInputShapes(void) const { return _input_shapes; }
- const int getNumRuns(void) const { return _num_runs; }
- const int getWarmupRuns(void) const { return _warmup_runs; }
- const int getRunDelay(void) const { return _run_delay; }
- const bool getGpuMemoryPoll(void) const { return _gpumem_poll; }
- const bool getMemoryPoll(void) const { return _mem_poll; }
- const bool getWriteReport(void) const { return _write_report; }
- const bool getModelValidate(void) const { return _tflite_validate; }
- const int getVerboseLevel(void) const { return _verbose_level; }
-
-private:
- void Initialize();
- void Parse(const int argc, char **argv);
-
-private:
- po::positional_options_description _positional;
- po::options_description _options;
-
- std::string _tflite_filename;
- std::string _dump_filename;
- std::string _compare_filename;
- std::string _input_filename;
- std::vector<int> _input_shapes;
- int _num_runs;
- int _warmup_runs;
- int _run_delay;
- bool _gpumem_poll;
- bool _mem_poll;
- bool _write_report;
- bool _tflite_validate;
- int _verbose_level;
-};
-
-} // end of namespace TFLiteVanillaRun
-
-#endif // __TFLITE_VANILLA_RUN_ARGS_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file TensorView.h
- * @brief This file contains TensorView class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
-#define __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
-
-#include "tensorflow/lite/interpreter.h"
-
-#include "misc/tensor/Shape.h"
-#include "misc/tensor/Index.h"
-#include "misc/tensor/Reader.h"
-#include "misc/tensor/NonIncreasingStride.h"
-
-namespace TFLiteVanillaRun
-{
-
-/**
- * @brief Class to define TensorView which is inherited from nnfw::misc::tensor::Reader<T> class
- */
-template <typename T> class TensorView final : public nnfw::misc::tensor::Reader<T>
-{
-public:
- /**
- * @brief Construct a TensorView object with base and shape informations
- * @param[in] shape The shape of a tensor
- * @param[in] base The base address of a tensor
- */
- TensorView(const nnfw::misc::tensor::Shape &shape, T *base) : _shape{shape}, _base{base}
- {
- // Set 'stride'
- _stride.init(_shape);
- }
-
-public:
- /**
- * @brief Get shape of tensor
- * @return Reference of shape
- */
- const nnfw::misc::tensor::Shape &shape(void) const { return _shape; }
-
-public:
- /**
- * @brief Get value of tensor index
- * @param[in] index The tensor index
- * @return The value at the index
- */
- T at(const nnfw::misc::tensor::Index &index) const override
- {
- const auto offset = _stride.offset(index);
- return *(_base + offset);
- }
-
-public:
- /**
- * @brief Get reference value of tensor index
- * @param[in] index The tensor index
- * @return The reference value at the index
- */
- T &at(const nnfw::misc::tensor::Index &index)
- {
- const auto offset = _stride.offset(index);
- return *(_base + offset);
- }
-
-private:
- nnfw::misc::tensor::Shape _shape; /**< The tensor shape */
-
-public:
- T *_base; /**< The base address of tensor */
- nnfw::misc::tensor::NonIncreasingStride _stride; /**< The NonIncreasingStride object */
-
-public:
- // TODO Introduce Operand ID class
- /**
- * @brief Create TensorView object using given parameters
- * @param[in] interp The TfLite interpreter
- * @param[in] tensor_index The tensor index
- * @return The new TensorView<T> object
- */
- static TensorView<T> make(::tflite::Interpreter &interp, int tensor_index)
- {
- auto tensor_ptr = interp.tensor(tensor_index);
-
- // Set 'shape'
- nnfw::misc::tensor::Shape shape(tensor_ptr->dims->size);
-
- for (uint32_t axis = 0; axis < shape.rank(); ++axis)
- {
- shape.dim(axis) = tensor_ptr->dims->data[axis];
- }
-
- return TensorView<T>(shape, interp.typed_tensor<T>(tensor_index));
- }
-};
-
-} // namespace TFLiteVanillaRun
-
-#endif // __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/kernels/register.h"
-#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
-
-#include "args.h"
-#include "tensor_view.h"
-#include "misc/EnvVar.h"
-#include "misc/RandomGenerator.h"
-#include "misc/tensor/IndexIterator.h"
-#include "misc/tensor/Object.h"
-#include "benchmark.h"
-
-#include <iostream>
-#include <chrono>
-#include <algorithm>
-#include <vector>
-#include <memory>
-
-using namespace std::placeholders; // for _1, _2 ...
-
-#define TFLITE_ENSURE(exp) \
- { \
- const TfLiteStatus status = (exp); \
- \
- if (status != kTfLiteOk) \
- { \
- std::ostringstream ss; \
- ss << #exp << " failed (" << __FILE__ << ":" << __LINE__ << ")"; \
- throw std::runtime_error{ss.str()}; \
- } \
- }
-
-namespace
-{
-
-void print_max_idx(float *f, int size)
-{
- float *p = std::max_element(f, f + size);
- std::cout << "max:" << p - f;
-}
-
-static const char *default_backend_cand = "tflite_cpu";
-
-// Verifies whether the model is a flatbuffer file.
-class BMFlatBufferVerifier : public tflite::TfLiteVerifier
-{
-public:
- bool Verify(const char *data, int length, tflite::ErrorReporter *reporter) override
- {
-
- flatbuffers::Verifier verifier(reinterpret_cast<const uint8_t *>(data), length);
- if (!tflite::VerifyModelBuffer(verifier))
- {
- reporter->Report("The model is not a valid Flatbuffer file");
- return false;
- }
- return true;
- }
-};
-
-} // namespace
-
-int main(const int argc, char **argv)
-{
- tflite::StderrReporter error_reporter;
-
- TFLiteVanillaRun::Args args(argc, argv);
-
- std::chrono::milliseconds t_model_load(0), t_prepare(0);
-
- // TODO Apply verbose level to phases
- const int verbose = args.getVerboseLevel();
- benchmark::Phases phases(
- benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
-
- std::unique_ptr<tflite::FlatBufferModel> model;
- std::unique_ptr<tflite::Interpreter> interpreter;
- std::unique_ptr<tflite::TfLiteVerifier> verifier{new BMFlatBufferVerifier};
-
- try
- {
- phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
- if (args.getModelValidate())
- {
- model = tflite::FlatBufferModel::VerifyAndBuildFromFile(args.getTFLiteFilename().c_str(),
- verifier.get(), &error_reporter);
- }
- else
- {
- model =
- tflite::FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(), &error_reporter);
- }
- if (model == nullptr)
- {
- throw std::runtime_error{"Cannot create model"};
- }
-
- // Use tflite's resolver, not onert's one
- tflite::ops::builtin::BuiltinOpResolver resolver;
- tflite::InterpreterBuilder builder(*model, resolver);
- TFLITE_ENSURE(builder(&interpreter))
- interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(-1));
- });
- }
- catch (const std::exception &e)
- {
- std::cerr << e.what() << '\n';
- return 1;
- }
-
- const bool use_nnapi = nnfw::misc::EnvVar("USE_NNAPI").asBool(false);
-
- try
- {
- phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
- if (use_nnapi)
- {
- // TFLite NNAPI is not worked yet
- interpreter->ModifyGraphWithDelegate(tflite::NnApiDelegate());
- }
- interpreter->AllocateTensors();
- });
- }
- catch (const std::exception &e)
- {
- std::cerr << e.what() << '\n';
- return 1;
- }
-
- const int seed = 1; /* TODO Add an option for seed value */
- nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
-
- // No input specified. So we fill the input tensors with random values.
- for (const auto &o : interpreter->inputs())
- {
- TfLiteTensor *tensor = interpreter->tensor(o);
- if (tensor->type == kTfLiteInt32)
- {
- // Generate singed 32-bit integer (s32) input
- auto tensor_view = TFLiteVanillaRun::TensorView<int32_t>::make(*interpreter, o);
-
- int32_t value = 0;
-
- nnfw::misc::tensor::iterate(tensor_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- // TODO Generate random values
- // Gather operation: index should be within input coverage.
- tensor_view.at(ind) = value;
- value++;
- };
- }
- else if (tensor->type == kTfLiteUInt8)
- {
- // Generate unsigned 8-bit integer input
- auto tensor_view = TFLiteVanillaRun::TensorView<uint8_t>::make(*interpreter, o);
-
- uint8_t value = 0;
-
- nnfw::misc::tensor::iterate(tensor_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- // TODO Generate random values
- tensor_view.at(ind) = value;
- value = (value + 1) & 0xFF;
- };
- }
- else if (tensor->type == kTfLiteBool)
- {
- // Generate bool input
- auto tensor_view = TFLiteVanillaRun::TensorView<bool>::make(*interpreter, o);
-
- auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
- const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
- &nnfw::misc::RandomGenerator::generate<bool>);
- const nnfw::misc::tensor::Object<bool> data(tensor_view.shape(),
- std::bind(fp, randgen, _1, _2));
-
- nnfw::misc::tensor::iterate(tensor_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
- tensor_view.at(ind) = value;
- };
- }
- else
- {
- assert(tensor->type == kTfLiteFloat32);
-
- const float *end = reinterpret_cast<const float *>(tensor->data.raw_const + tensor->bytes);
- for (float *ptr = tensor->data.f; ptr < end; ptr++)
- {
- *ptr = randgen.generate<float>();
- }
- }
- }
-
- std::cout << "input tensor indices = [";
- for (const auto &o : interpreter->inputs())
- {
- std::cout << o << ",";
- }
- std::cout << "]" << std::endl;
-
- // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
- // only warmup.
- if (verbose == 0)
- {
- phases.run(
- "WARMUP", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
- args.getWarmupRuns());
- phases.run(
- "EXECUTE", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
- args.getNumRuns(), true);
- }
- else
- {
- phases.run(
- "WARMUP", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
- [&](const benchmark::Phase &phase, uint32_t nth) {
- std::cout << "... "
- << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
- << std::endl;
- },
- args.getWarmupRuns());
- phases.run(
- "EXECUTE", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
- [&](const benchmark::Phase &phase, uint32_t nth) {
- std::cout << "... "
- << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" << std::endl;
- },
- args.getNumRuns(), true);
- }
-
- std::cout << "output tensor indices = [";
- for (const auto &o : interpreter->outputs())
- {
- std::cout << o << "(";
-
- print_max_idx(interpreter->tensor(o)->data.f, interpreter->tensor(o)->bytes / sizeof(float));
-
- std::cout << "),";
- }
- std::cout << "]" << std::endl;
-
- // TODO Apply verbose level to result
-
- // prepare result
- benchmark::Result result(phases);
-
- // to stdout
- benchmark::printResult(result);
-
- if (args.getWriteReport())
- {
- // prepare csv task
- std::string exec_basename;
- std::string model_basename;
- std::string backend_name = default_backend_cand;
- {
- std::vector<char> vpath(args.getTFLiteFilename().begin(), args.getTFLiteFilename().end() + 1);
- model_basename = basename(vpath.data());
- size_t lastindex = model_basename.find_last_of(".");
- model_basename = model_basename.substr(0, lastindex);
- exec_basename = basename(argv[0]);
- }
- benchmark::writeResult(result, exec_basename, model_basename, backend_name);
- }
-
- return 0;
-}
It takes `modelfile` as input and generates `nnpackage`.
+## prerequisite
+
+Python 3.5 or greater
+
## Usage
```
-Usage: model2nnpkg.sh [options] modelfile
-Convert modelfile to nnpackage.
+usage: model2nnpkg.py [options]
+ Examples:
+ model2nnpkg.py -m add.tflite => create nnpkg "add" in current directory
+ model2nnpkg.py -o out -m add.tflite => create nnpkg "add" in out/
+ model2nnpkg.py -o out -p addpkg -m add.tflite => create nnpkg "addpkg" in out/
+ model2nnpkg.py -c add.cfg -m add.tflite => create nnpkg "add" with add.cfg
+ model2nnpkg.py -o out -p addpkg -m a1.tflite a2.tflite -i a1.json a2.json
+ => create nnpkg "addpkg" with models a1.tflite and a2.tflite in out/
+
+
+Convert model files (tflite, circle or tvn) to nnpkg.
+
+options:
+ -h, --help show this help message and exit
+ -o output_directory, --outdir output_directory
+ set nnpkg output directory
+ -p nnpkg_name, --nnpkg-name nnpkg_name
+ set nnpkg output name (default=[1st modelfile name])
+ -c conf [conf ...], --config conf [conf ...]
+ provide configuration files
+ -m model [model ...], --models model [model ...]
+ provide model files
+ -i io_info [io_info ...], --io-info io_info [io_info ...]
+ provide io info
+```
+
+## Usage (To be deprecated)
+```
+Usage: model2nnpkg.sh [options]
+Convert modelfile (tflite, circle or tvn) to nnpackage.
Options:
-h show this help
-o set nnpackage output directory (default=.)
- -p set nnpackage output name (default=[modelfile name])
+ -p set nnpackage output name (default=[1st modelfile name])
+ -c provide configuration files
+ -m provide model files
Examples:
- model2nnpkg.sh add.tflite => create nnpackage 'add' in ./
- model2nnpkg.sh -o out add.tflite => create nnpackage 'add' in out/
- model2nnpkg.sh -o out -p addpkg add.tflite => create nnpackage 'addpkg' in out/
+ model2nnpkg.sh -m add.tflite => create nnpackage 'add' in ./
+ model2nnpkg.sh -o out -m add.tflite => create nnpackage 'add' in out/
+ model2nnpkg.sh -o out -p addpkg -m add.tflite => create nnpackage 'addpkg' in out/
+ model2nnpkg.sh -c add.cfg -m add.tflite => create nnpackage 'add' with add.cfg
+ model2nnpkg.py -o out -p addpkg -m a1.tflite a2.tflite => create nnpackage "addpkg" with models a1.tflite and a2.tflite in out/
```
--- /dev/null
+#!/usr/bin/env python3
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+import os
+import shutil
+import sys
+
+
+def _is_json(myjson):
+ try:
+ json.load(myjson)
+ except ValueError as e:
+ return False
+ return True
+
+
+def _verify_args(args):
+ if args.config and len(args.config) != len(args.models):
+ raise Exception(
+ 'error: when config file is provided, # of config file should be same with modelfile\n'
+ +
+ "Please provide config file for each model file, or don't provide config file."
+ )
+
+ for i in range(len(args.models)):
+ model_path = args.models[i]
+ if not os.path.isfile(model_path):
+ raise Exception(f'error: {model_path} does not exist.')
+
+ modelfile = os.path.basename(model_path)
+ if len(modelfile.split('.')) == 1:
+ raise Exception(
+ 'error: modelfile does not have extension.\n' +
+ "Please provide extension so that $progname can identify what type of model you use."
+ )
+
+ if args.config:
+ config_path = os.path.basename(args.config[i])
+ if not os.path.isfile(config_path):
+ raise Exception(f'error: {config_path} does not exist.')
+
+ # Check each json file
+ for io_info_path in [path for path in (args.io_info or [])]:
+ with open(io_info_path, "r") as io_json:
+ if not _is_json(io_json):
+ raise Exception(
+ f'error: io info file {io_info_path} is not json file.\n' +
+ "Please provide json file that so that $progname can identify what inputs/outputs of model you use."
+ )
+
+ # Check size of indices of original model
+ size_inputs = 0
+ size_outputs = 0
+ for model_index, io_info_path in enumerate([path for path in (args.io_info or [])]):
+ with open(io_info_path, "r") as io_json:
+ model_io = json.load(io_json)
+ if model_index == 0:
+ size_inputs = len(model_io["org-model-io"]["inputs"]["new-indices"])
+ size_outputs = len(model_io["org-model-io"]["outputs"]["new-indices"])
+ else:
+ if size_inputs != len(model_io["org-model-io"]["inputs"]["new-indices"]):
+ raise Exception(
+ f'error: Invalid size of input indices\n' +
+ "The size of orginal model's inputs in io info file {io_info_path} is different from the previous files."
+ )
+ if size_outputs != len(
+ model_io["org-model-io"]["outputs"]["new-indices"]):
+ raise Exception(
+ f'error: Invalid size of output indices.\n' +
+ "The size of orginal model's outputs in io info file {io_info_path} is different from the previous files."
+ )
+
+
+def _get_args():
+ parser = argparse.ArgumentParser(
+ description='Convert model files (tflite, circle or tvn) to nnpkg.',
+ usage=''' %(prog)s [options]
+ Examples:
+ %(prog)s -m add.tflite => create nnpkg "add" in current directory
+ %(prog)s -o out -m add.tflite => create nnpkg "add" in out/
+ %(prog)s -o out -p addpkg -m add.tflite => create nnpkg "addpkg" in out/
+ %(prog)s -c add.cfg -m add.tflite => create nnpkg "add" with add.cfg
+ %(prog)s -o out -p addpkg -m a1.tflite a2.tflite -i a1.json a2.json
+ => create nnpkg "addpkg" with models a1.tflite and a2.tflite in out/
+ ''')
+ parser.add_argument(
+ '-o',
+ '--outdir',
+ type=str,
+ default=os.getcwd(),
+ metavar='output_directory',
+ help='set nnpkg output directory')
+ parser.add_argument(
+ '-p',
+ '--nnpkg-name',
+ type=str,
+ metavar='nnpkg_name',
+ help='set nnpkg output name (default=[1st modelfile name])')
+ parser.add_argument(
+ '-c',
+ '--config',
+ type=str,
+ nargs='+',
+ default='',
+ metavar='conf',
+ help='provide configuration files')
+ parser.add_argument(
+ '-m',
+ '--models',
+ type=str,
+ nargs='+',
+ metavar='model',
+ help='provide model files')
+ parser.add_argument(
+ '-i', '--io-info', type=str, nargs='+', metavar='io_info', help='provide io info')
+
+ args = parser.parse_args()
+
+ _verify_args(args)
+
+ if not args.nnpkg_name:
+ first_model_name = os.path.basename(args.models[0]).rsplit('.', 1)[0]
+ args.nnpkg_name = first_model_name
+
+ args.prog = parser.prog
+
+ return args
+
+
+def _get_org_model_input_size(json_path):
+ with open(json_path, "r") as io_json:
+ model_io = json.load(io_json)
+ return len(model_io["org-model-io"]["inputs"]["new-indices"])
+
+
+def _get_org_model_output_size(json_path):
+ with open(json_path, "r") as io_json:
+ model_io = json.load(io_json)
+ return len(model_io["org-model-io"]["outputs"]["new-indices"])
+
+
+def _generate_io_conn_info(io_info_files):
+ ret = {}
+
+ if io_info_files is None:
+ return ret
+
+ pkg_inputs = list(range(_get_org_model_input_size(io_info_files[0])))
+ pkg_outputs = list(range(_get_org_model_output_size(io_info_files[0])))
+
+ org_model_io = []
+ new_model_io = {"inputs": [], "outputs": []}
+ for model_pos, io_info_path in enumerate(io_info_files):
+ with open(io_info_path, "r") as io_json:
+ model_io = json.load(io_json)
+
+ org_model_io.append(model_io["org-model-io"])
+ new_model_io["inputs"].append(model_io["new-model-io"]["inputs"])
+ new_model_io["outputs"].append(model_io["new-model-io"]["outputs"])
+
+ for model_pos in range(len(org_model_io)):
+ # Set pkg-inputs
+ for org_model_input_pos, new_input_index in enumerate(
+ org_model_io[model_pos]["inputs"]["new-indices"]):
+ if new_input_index != -1:
+ for new_model_input_pos, input_index in enumerate(
+ new_model_io["inputs"][model_pos]["new-indices"]):
+ if new_input_index == input_index:
+ pkg_inputs[
+ org_model_input_pos] = f'{model_pos}:0:{new_model_input_pos}'
+ break
+
+ if pkg_inputs[org_model_input_pos] == 0:
+ raise Exception(
+ f'error: Wrong io information\n' +
+ "The input index {new_input_index} exists in org-model-io, but not in new-model-io\n"
+ + "Please check {io_info_files[model_pos]}")
+
+ # Set pkg-outputs
+ for org_model_output_pos, new_output_index in enumerate(
+ org_model_io[model_pos]["outputs"]["new-indices"]):
+ if new_output_index != -1:
+ for new_model_output_pos, output_index in enumerate(
+ new_model_io["outputs"][model_pos]["new-indices"]):
+ if new_output_index == output_index:
+ pkg_outputs[
+ org_model_output_pos] = f'{model_pos}:0:{new_model_output_pos}'
+ break
+
+ if pkg_outputs[org_model_output_pos] == 0:
+ raise Exception(
+ f'error: Wrong io information\n' +
+ "The output index {new_output_index} exists in org-model-io, but not in new-model-io\n"
+ + "Please check {io_info_files[model_pos]}")
+
+ ret["pkg-inputs"] = pkg_inputs
+ ret["pkg-outputs"] = pkg_outputs
+
+ model_connect = {}
+ for input_model_pos, inputs in enumerate(new_model_io["inputs"]):
+ for output_model_pos, outputs in enumerate(new_model_io["outputs"]):
+ if input_model_pos == output_model_pos:
+ continue
+
+ for input_index_pos, org_input_index in enumerate(inputs["org-indices"]):
+ for output_index_pos, org_output_index in enumerate(
+ outputs["org-indices"]):
+ if org_input_index == org_output_index:
+ edge_to = f'{input_model_pos}:0:{input_index_pos}'
+ edge_from = f'{output_model_pos}:0:{output_index_pos}'
+
+ if edge_from not in model_connect:
+ model_connect[edge_from] = [edge_to]
+ else:
+ model_connect[edge_from].append(edge_to)
+
+ ret["model-connect"] = [{
+ "from": edge_from,
+ "to": edge_to
+ } for edge_from, edge_to in model_connect.items()]
+
+ return ret
+
+
+def _generate_manifest(args):
+ config_list = [""]
+ if args.config:
+ config_list = [os.path.basename(e) for e in args.config]
+ models_list = [os.path.basename(e) for e in args.models]
+ types_list = [os.path.basename(e).rsplit('.', 1)[1] for e in args.models]
+ io_conn_info = _generate_io_conn_info(args.io_info)
+
+ manifest = {}
+ manifest["major-version"] = "1"
+ manifest["minor-version"] = "2"
+ manifest["patch-version"] = "0"
+ manifest["configs"] = config_list
+ manifest["models"] = models_list
+ manifest["model-types"] = types_list
+ manifest = {**manifest, **io_conn_info} # Requires python 3.5 or greater
+
+ return manifest
+
+
+def main():
+ try:
+ # parse arguments
+ args = _get_args()
+
+ print(f'{args.prog}: Generating nnpkg {args.nnpkg_name} in {args.outdir}')
+ # mkdir nnpkg directory
+ nnpkg_path = os.path.join(args.outdir, args.nnpkg_name)
+ os.makedirs(os.path.join(nnpkg_path, 'metadata'), exist_ok=True)
+
+ # dump manifest file
+ manifest = _generate_manifest(args)
+ manifest_path = os.path.join(nnpkg_path, 'metadata', 'MANIFEST')
+ with open(manifest_path, "w") as json_file:
+ json_file.write(f'{json.dumps(manifest, indent=2)}\n')
+
+ # copy models and configurations
+ for i in range(len(args.models)):
+ shutil.copy2(args.models[i], nnpkg_path)
+ if args.config:
+ shutil.copy2(args.config[i], os.path.join(nnpkg_path, 'metadata'))
+ except Exception as e:
+ print(e)
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main()
p) name=$OPTARG;;
c)
configs_src=($OPTARG)
- until [[ $OPTIND -gt $# ]] || [[ $(eval "echo \${$OPTIND}") =~ ^-.* ]] || [ -z $(eval "echo \${$OPTIND}") ]; do
+ until [[ $OPTIND -gt $# ]] || [[ $(eval "echo \${$OPTIND}") =~ ^-.* ]] || [ -z "$(eval "echo \${$OPTIND}")" ]; do
if [[ $OPTIND -eq $# ]] && [[ ${#models_src[@]} -eq 0 ]]; then
# Backward compatibility (will be deprecated)
# The last remain parameter is model if there is no option "-m"
;;
m)
models_src=($OPTARG)
- until [[ $OPTIND -gt $# ]] || [[ $(eval "echo \${$OPTIND}") =~ ^-.* ]] || [ -z $(eval "echo \${$OPTIND}") ]; do
+ until [[ $OPTIND -gt $# ]] || [[ $(eval "echo \${$OPTIND}") =~ ^-.* ]] || [ -z "$(eval "echo \${$OPTIND}")" ]; do
models_src+=($(eval "echo \${$OPTIND}"))
OPTIND=$((OPTIND + 1))
done
fi
delim=""
-for modelpath in ${models_src[@]}
+for modelpath in "${models_src[@]}"
do
modelfile=$(basename "$modelpath")
exit 1
fi
- if [ ! -e $modelpath ]; then
- >&2 echo "error: "$modelpath" does not exist."
+ if [ ! -e "$modelpath" ]; then
+ >&2 echo "error: $modelpath does not exist."
exit 1
fi
done
delim=""
-for configpath in ${configs_src[@]}
+for configpath in "${configs_src[@]}"
do
configfile=$(basename "$configpath")
- if [ ! -e $configpath ]; then
- >&2 echo "error: "$configpath" does not exist."
+ if [ ! -e "$configpath" ]; then
+ >&2 echo "error: $configpath does not exist."
exit 1
fi
name=${first_modelfile%.*}
fi
-echo "$progname: Generating nnpackage "$name" in "$outdir""
+echo "$progname: Generating nnpackage $name in $outdir"
mkdir -p "$outdir"/"$name"/metadata
cat > "$outdir"/"$name"/metadata/MANIFEST <<-EOF
}
EOF
-for modelpath in ${models_src[@]}
+for modelpath in "${models_src[@]}"
do
cp "$modelpath" "$outdir"/"$name"
done
-for configpath in ${configs_src[@]}
+for configpath in "${configs_src[@]}"
do
cp "$configpath" "$outdir/$name/metadata"
done
# nncc-tc-to-nnpkg-tc
-`model2nnpkg` is a tool to convert model (either `tflite` or `circle`) to `nnpackage`.
+`nncc-tc-to-nnpkg-tc` is a tool to convert nncc testcase to nnpackage testcase.
-It takes `modelfile` as input and generates `nnpackage`.
+It takes `nncc-tc` as input and generates `nnpkg-tc`.
+
+## prerequisite
+
+Python 3.5 or greater (for internally using model2nnpkg)
## Usage
progname=$(basename "${BASH_SOURCE[0]}")
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-model2nnpkg=${model2nnpkg:-"$script_dir"/../model2nnpkg/model2nnpkg.sh}
+model2nnpkg=${model2nnpkg:-"$script_dir"/../model2nnpkg/model2nnpkg.py}
# Need to install nncc package & set path to tf2nnpkg
tf2nnpkg=$(which tf2nnpkg)
$tf2nnpkg --info "$indir/$tcname".info --graphdef "$indir/$tcname"."$model_type" \
"$tf_intf_version" -o "$outdir"
else
- $model2nnpkg -o "$outdir" "$indir/$tcname"."$model_type"
+ $model2nnpkg -o "$outdir" -m "$indir/$tcname"."$model_type"
fi
extensions="
--- /dev/null
+# qnf
+
+`qnf` is a tool to convert between quantized and float.
+
+It gets quantization parameters from input circle file.
+
+## Prerequisite
+
+$ python -r requirements.txt
+
+## Usage
+
+```
+$ ./qnf.py -h
+$ python tools/nnpackage_tool/qnf/qnf.py -h
+usage: qnf.py [-h] [-o OUT_DIR] [-q | -d] h5 circle
+
+positional arguments:
+ h5 path to h5 file either input or output to model
+ circle path to quantized circle model
+
+optional arguments:
+ -h, --help show this help message and exit
+ -o OUT_DIR, --output OUT_DIR
+ output directory
+ -q, --quantize quantize f32 to q8u using circle input's qparam
+ (default: false)
+ -d, --dequantize dequantize q8u to f32 using circle output's qparam
+ (default: false)
+
+Examples:
+ qnf.py -q input.h5 0c/0.circle => generated quantized input as input_.h5
+ qnf.py -d output.h5 0c/0.circle => generated dequantized output as output_.h5
+ qnf.py -o out/out.h5 -d output.h5 0c/0.circle => generated dequantized output in out/output.h5
+```
--- /dev/null
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+from circle_schema import circle
+import h5py
+import numpy as np
+import os
+import sys
+
+h5dtypes = {
+ "float32": ">f4",
+ "uint8": "u1",
+ "int8": "i1",
+ "bool": "u1",
+ "int32": "int32",
+ "int64": "int64"
+}
+
+
+def quantize(h5_path, circle_path, h5_out_path):
+ with open(circle_path, 'rb') as f:
+ graph = circle.Model.GetRootAsModel(f.read(), 0).Subgraphs(0)
+ input_tensor = graph.Tensors(graph.Inputs(0))
+ input_names = [input_tensor.Name()]
+
+ with h5py.File(h5_path, 'r') as hf:
+ dset = hf['/value/0']
+ arr = np.array(dset)
+
+ if not np.issubdtype(arr.dtype,
+ np.float32) or input_tensor.Type() != circle.TensorType.UINT8:
+ print("Not f32 to q8u")
+ sys.exit(-1)
+
+ # copied from python-tools/examples/pytorch_tutorial/main.py
+ dtype = 'uint8'
+
+ def _quantize_input0(data):
+ qparam = graph.Tensors(graph.Inputs(0)).Quantization()
+ rescaled_data = data / qparam.ScaleAsNumpy()[0] + qparam.ZeroPointAsNumpy()[0]
+ return np.round(rescaled_data).clip(np.iinfo(dtype).min,
+ np.iinfo(dtype).max).astype(dtype)
+
+ qarr = _quantize_input0(arr)
+
+ ensure_output_dir(h5_out_path)
+ with h5py.File(h5_out_path, 'w') as hf:
+ name_grp = hf.create_group("name")
+ val_grp = hf.create_group("value")
+ idx = 0
+ val_grp.create_dataset(str(idx), data=qarr, dtype=h5dtypes[dtype])
+ name_grp.attrs[str(idx)] = input_names[0]
+
+
+def dequantize(h5_path, circle_path, h5_out_path):
+ with open(circle_path, 'rb') as f:
+ graph = circle.Model.GetRootAsModel(f.read(), 0).Subgraphs(0)
+ output_tensor = graph.Tensors(graph.Outputs(0))
+ output_names = [output_tensor.Name()]
+
+ with h5py.File(h5_path, 'r') as hf:
+ dset = hf['/value/0']
+ arr = np.array(dset)
+ if not np.issubdtype(arr.dtype,
+ np.uint8) or output_tensor.Type() != circle.TensorType.UINT8:
+ print("Not q8u to f32")
+ sys.exit(-1)
+
+ # copied from python-tools/examples/pytorch_tutorial/main.py
+ def _dequantize_output0(data):
+ qparam = graph.Tensors(graph.Outputs(0)).Quantization()
+ return (data.astype(np.float32) -
+ qparam.ZeroPointAsNumpy()[0]) * qparam.ScaleAsNumpy()[0]
+
+ qarr = _dequantize_output0(arr)
+
+ ensure_output_dir(h5_out_path)
+ with h5py.File(h5_out_path, 'w') as hf:
+ name_grp = hf.create_group("name")
+ val_grp = hf.create_group("value")
+ idx = 0
+ val_grp.create_dataset(str(idx), data=qarr, dtype='>f4')
+ name_grp.attrs[str(idx)] = output_names[0]
+
+
+def makeArgParser():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ 'h5', type=str, help='path to h5 file either input or output to model')
+ parser.add_argument('circle', type=str, help='path to quantized circle model')
+ parser.add_argument(
+ '-o', '--output', action='store', dest="out_path", help="output file")
+ group = parser.add_mutually_exclusive_group()
+ group.add_argument(
+ '-q',
+ '--quantize',
+ action='store_true',
+ help="quantize f32 to q8u using circle input's qparam (default: false)")
+ group.add_argument(
+ '-d',
+ '--dequantize',
+ action='store_true',
+ help="dequantize q8u to f32 using circle output's qparam (default: false)")
+ return parser
+
+
+def parseArgs():
+ args = parser.parse_args()
+ return args
+
+
+def ensure_output_dir(out_path):
+ if os.path.dirname(out_path):
+ os.makedirs(os.path.dirname(out_path), exist_ok=True)
+
+
+if __name__ == '__main__':
+ parser = makeArgParser()
+ args = parseArgs()
+
+ h5_path, circle_path = args.h5, args.circle
+
+ if args.out_path:
+ out_path = args.out_path
+ else:
+ h5_name, ext = os.path.splitext(h5_path)
+ out_path = h5_name + '_' + ext
+
+ if args.quantize:
+ quantize(h5_path, circle_path, out_path)
+
+ if args.dequantize:
+ dequantize(h5_path, circle_path, out_path)
--- /dev/null
+numpy>=1.18.5
+circle-schema>=0.4.0.dev0
+h5py>=2.10.0
${flatc} -o ./ -b ${circle_schema} $name.$suffix.json.fused.datalayout
mv $name.$suffix.json.fused.circle $name.$suffix.circle
tools/nnpackage_tool/gen_golden/gen_golden.py $name.$suffix.pb
-tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh -o ${outdir} $name.$suffix.circle
+tools/nnpackage_tool/model2nnpkg/model2nnpkg.py -o ${outdir} -m $name.$suffix.circle
mkdir -p ${outdir}/$name.$suffix/metadata/tc
mv {input,expected}.h5 ${outdir}/$name.$suffix/metadata/tc/
mv $name.$suffix.{pb,tflite} ${outdir}/$name.$suffix/
└── input.h5
# @ target
-# run nnpkg with nnpackage_run and compare with h5diff
+# run nnpkg with onert_run and compare with h5diff
$ onert/test/onert-test nnpkg-test -i nnpkg-tcs cast
```
name=${tflite_basename%.*}
tools/nnpackage_tool/gen_golden/gen_golden.py $1
-tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh -o ${outdir} $1
+tools/nnpackage_tool/model2nnpkg/model2nnpkg.py -o ${outdir} -m $1
mkdir -p ${outdir}/$name/metadata/tc
mv {input,expected}.h5 ${outdir}/$name/metadata/tc/
cp $1 ${outdir}/$name/
The contents of the folder can be categorized into the following groups:
-- [Generator scripts to map decision variables to `nnpackage_run` parameters](#mapping-decision-to-parameters)
+- [Generator scripts to map decision variables to `onert_run` parameters](#mapping-decision-to-parameters)
- [Estimator scripts to compute pareto front](#pareto-estimation)
The following subsections describe the role of each script in detail.
## Mapping Decision to Parameters
The generator script `gen_oplist.py` is located under `generator` folder, and encodes large integer representations for `nnpackage` backend assignments. Effectively, it maps suitable backend assignments to integer values. For example, a graph with only three operations and two backends will have a integer representation in the range `(0, 7)`. Thus a value `0` might imply all operations run on `cpu`, while `7` might imply that all operations run on `acl_cl` backend. As will be described below, the integer representation of `nnpackage` parameters serves as a convenient decision space for pareto estimation.
-Setting up parameters for `nnpackage_run` requires a knowledge of model-specific operations. To this end, the `gen_oplist.py` script generates for each model, a `oplist` of unique operations. If an exhaustive mapping of backends to operation sequences is preferred, then `gen_oplist.py` also generates a so-called `opmap` list for uniquely observed `<operation name, data size>` pairs.
+Setting up parameters for `onert_run` requires a knowledge of model-specific operations. To this end, the `gen_oplist.py` script generates for each model, a `oplist` of unique operations. If an exhaustive mapping of backends to operation sequences is preferred, then `gen_oplist.py` also generates a so-called `opmap` list for uniquely observed `<operation name, data size>` pairs.
`gen_oplist.py` is run on the development environment (read: *Desktop PC*) as shown below:
```
For details, type `python brute_force_profiler.py --help`. Below is a example of the dump generated by the brute-force profiler:
```
-{"oplist": ["Pool2D", "BinaryArithmetic", "DepthwiseConv2D", "Conv2D", "Reshape"],
+{"oplist": ["Pool2D", "BinaryArithmetic", "DepthwiseConv2D", "Conv2D", "Reshape"],
"solutions": [
- {"memory": 56388, "id": 0, "time": 72.525},
- {"memory": 63624, "id": 1, "time": 86.532},
- {"memory": 64320, "id": 2, "time": 69.352},
- {"memory": 65376, "id": 3, "time": 76.436},
- {"memory": 73016, "id": 4, "time": 69.634},
- {"memory": 73492, "id": 5, "time": 47.013},
- {"memory": 74488, "id": 6, "time": 95.01},
- {"memory": 74844, "id": 7, "time": 111.329},
- {"memory": 393324, "id": 8, "time": 98.956},
- {"memory": 395088, "id": 9, "time": 103.24},
- {"memory": 396180, "id": 10, "time": 68.107},
- {"memory": 395932, "id": 11, "time": 86.109},
- {"memory": 402468, "id": 12, "time": 25.477},
- {"memory": 402800, "id": 13, "time": 25.42},
- {"memory": 403904, "id": 14, "time": 9.168},
- {"memory": 404476, "id": 15, "time": 7.801},
+ {"memory": 56388, "id": 0, "time": 72.525},
+ {"memory": 63624, "id": 1, "time": 86.532},
+ {"memory": 64320, "id": 2, "time": 69.352},
+ {"memory": 65376, "id": 3, "time": 76.436},
+ {"memory": 73016, "id": 4, "time": 69.634},
+ {"memory": 73492, "id": 5, "time": 47.013},
+ {"memory": 74488, "id": 6, "time": 95.01},
+ {"memory": 74844, "id": 7, "time": 111.329},
+ {"memory": 393324, "id": 8, "time": 98.956},
+ {"memory": 395088, "id": 9, "time": 103.24},
+ {"memory": 396180, "id": 10, "time": 68.107},
+ {"memory": 395932, "id": 11, "time": 86.109},
+ {"memory": 402468, "id": 12, "time": 25.477},
+ {"memory": 402800, "id": 13, "time": 25.42},
+ {"memory": 403904, "id": 14, "time": 9.168},
+ {"memory": 404476, "id": 15, "time": 7.801},
....
- {"memory": 403940, "id": 30, "time": 9.145},
+ {"memory": 403940, "id": 30, "time": 9.145},
{"memory": 403568, "id": 31, "time": 8.034}]}
```
```
{"configs": {
- "4": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=cpu ",
- "10": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=cpu OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=cpu ",
- "14": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=cpu ",
- "16": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=cpu OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ",
- "20": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ",
- "21": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=acl_cl OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ",
- "31": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=acl_cl OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=acl_cl "},
- "oplist": ["Pool2D", "DepthwiseConv2D", "Reshape", "Conv2D", "BinaryArithmetic"],
+ "4": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=cpu ",
+ "10": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=cpu OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=cpu ",
+ "14": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=cpu ",
+ "16": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=cpu OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ",
+ "20": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=cpu OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ",
+ "21": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=acl_cl OP_BACKEND_DepthwiseConv2D=cpu OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=cpu OP_BACKEND_BinaryArithmetic=acl_cl ",
+ "31": "BACKENDS=\"acl_cl;cpu\" OP_BACKEND_Pool2D=acl_cl OP_BACKEND_DepthwiseConv2D=acl_cl OP_BACKEND_Reshape=acl_cl OP_BACKEND_Conv2D=acl_cl OP_BACKEND_BinaryArithmetic=acl_cl "},
+ "oplist": ["Pool2D", "DepthwiseConv2D", "Reshape", "Conv2D", "BinaryArithmetic"],
"solutions": [
- {"exec_time": 76.138, "max_rss": 62712, "id": 4},
- {"exec_time": 72.719, "max_rss": 65272, "id": 16},
- {"exec_time": 22.409, "max_rss": 403120, "id": 14},
- {"exec_time": 28.138, "max_rss": 403064, "id": 10},
- {"exec_time": 70.656, "max_rss": 65536, "id": 20},
- {"exec_time": 68.805, "max_rss": 66076, "id": 21},
+ {"exec_time": 76.138, "max_rss": 62712, "id": 4},
+ {"exec_time": 72.719, "max_rss": 65272, "id": 16},
+ {"exec_time": 22.409, "max_rss": 403120, "id": 14},
+ {"exec_time": 28.138, "max_rss": 403064, "id": 10},
+ {"exec_time": 70.656, "max_rss": 65536, "id": 20},
+ {"exec_time": 68.805, "max_rss": 66076, "id": 21},
{"exec_time": 8.201, "max_rss": 404656, "id": 31}], "mode": "name"}
```
**Note**: The pareto-estimation algorithms require the use of python `numpy` package, so make sure to install it beforehand.
if __name__ == "__main__":
parser = ProfileArgs(
- prog="brute_force_profiler.py", description="Profiles nnpackage_run using oplist")
+ prog="brute_force_profiler.py", description="Profiles onert_run using oplist")
# Parse arguments
args = parser.parse_args()
modelfile = args.model
super(ProfileArgs, self).__init__(args, kwargs)
self.add_argument(
'model', type=str, default=None, help='nnpackage name with path')
- self.add_argument('run_folder', type=str, help="path to nnpackage_run executable")
+ self.add_argument('run_folder', type=str, help="path to onert_run executable")
self.add_argument(
'--mode',
type=str.lower,
def run_inference(self, solution):
cmd_str = [
- ". /tmp/envvars.sh && " + self._run_folder + "/nnpackage_run -w1 -r1 -m1 -l "
- + self._model + "/metadata/tc/input.h5 " + self._model + " 2> /dev/null"
+ ". /tmp/envvars.sh && " + self._run_folder + "/onert_run -w1 -r1 -m1 -l " +
+ self._model + "/metadata/tc/input.h5 " + self._model + " 2> /dev/null"
]
res = exec_shell(cmd_str, newline_split=True)
try:
self.remote(remote_trace_path), local_trace_path])
def profile_backend(self, backend, backend_op_list):
- nnpkg_run_path = self.base_dir / 'out/bin/nnpackage_run'
+ nnpkg_run_path = self.base_dir / 'out/bin/onert_run'
nnpkg_path = self.base_dir / self.nnpkg_dir.name
cmd = ["ssh", f"{self.host}"]
Above selects operator index 11, 12, 13 in subgraph 1
+### Generating separate models for multi-model from a model file by using model generator here
+
+To make one model multi-model, separate models and inputs/outputs information of each model are required.
+So run model generator with the option `--store-io-info`
+
+#### How to use
+
+```
+./select_operator.py <base model file> <opcode list txt file> <output file name> --store-io-info <output json file name>
+```
+
+#### Example
+
+This example generates one model into two separate models.
+
+```
+$ cat 0-26.txt
+0-26
+
+$ cat 27-30.txt
+27-30
+
+$ ./tools/tflitefile_tool/select_operator.py mobilenet_v1_1.0_224.tflite 0-26.txt m1.tflite --store-io-info m1.json
+Input tensor(s): [81]
+Output tensor(s): [44]
+Append subgraphs, orginal index : 0 , new index : 0
+
+$ ./tools/tflitefile_tool/select_operator.py mobilenet_v1_1.0_224.tflite 27-30.txt m2.tflite --store-io-info m2.json
+Input tensor(s): [6]
+Output tensor(s): [7]
+Append subgraphs, orginal index : 0 , new index : 0
+
+$ cat m1.json
+{"org-model-io": {"inputs": {"new-indices": [81]}, "outputs": {"new-indices": [-1]}}, "new-model-io": {"inputs": {"org-indices": [88], "new-indices": [81]}, "outputs": {"org-indices": [50], "new-indices": [44]}}}
+
+$ cat m2.json
+{"org-model-io": {"inputs": {"new-indices": [-1]}, "outputs": {"new-indices": [7]}}, "new-model-io": {"inputs": {"org-indices": [50], "new-indices": [6]}, "outputs": {"org-indices": [87], "new-indices": [7]}}}
+
+```
+The meaning of `m1.json` above is as follows:
+- original model has 1 input and 1 output
+ - The only input is located at tensors[81] from new model.
+ - The only output has new-index -1, which means it is not in new-model.
+- new-model has 1 input and 1 output
+ - The only input was located at tensors[88] from org model, and it is located at tensors[81] from new model.
+ - The only output was located at tensors[50] from org model, and it is located at tensors[44] from new model.
+
+With the model files and inputs/outputs infomation files generated above, you can use `model2nnpkg.py` to create nnpkg for multi-model.
+
## Colaboration model parser and model generator
1. Get imformation about base model using model parser
TYPE_TO_NPTYPE = {
- 'BOOL': np.bool,
+ 'BOOL': np.bool_,
'COMPLEX64': np.cdouble,
'FLOAT16': np.float16,
'FLOAT32': np.float32,
import tflite.BuiltinOptions
import argparse
import pkg_resources
+import json
# On flatbuffers 2.0, EndVector doesn't require length argument any more.
# The selected subgraph will be primary subgraph of the model to be created newly
selected_subgraph = sample_model.Subgraphs(args.subgraph)
- # k: old subg index, v: new subg index
+ # k: orginal subg index, v: new subg index
# new subg index is sequential in used_subgraphs_dic
for k, v in used_subgraphs_dic.items():
- print("Append subgraphs, old index : ", k, ", new index : ", v)
+ print("Append subgraphs, orginal index : ", k, ", new index : ", v)
if k == args.subgraph:
assert v == 0
new_subgraph = GenerateSubgraph(new_builder, selected_subgraph, operator_list,
return tflite.Model.ModelEnd(new_builder)
+def StoreIOInfo(path, used_tensors, org_inputs, org_outputs, new_inputs, new_outputs):
+ ioinfo = {}
+
+ # For inputs and outputs of org model
+ ioinfo["org-model-io"] = {
+ "inputs": {
+ "new-indices": []
+ },
+ "outputs": {
+ "new-indices": []
+ }
+ }
+ for input_tensor_idx in org_inputs:
+ if input_tensor_idx in used_tensors:
+ ioinfo["org-model-io"]["inputs"]["new-indices"].append(
+ used_tensors[input_tensor_idx])
+ else:
+ ioinfo["org-model-io"]["inputs"]["new-indices"].append(-1)
+ for output_tensor_idx in org_outputs:
+ if output_tensor_idx in used_tensors:
+ ioinfo["org-model-io"]["outputs"]["new-indices"].append(
+ used_tensors[output_tensor_idx])
+ else:
+ ioinfo["org-model-io"]["outputs"]["new-indices"].append(-1)
+
+ # For inputs and outputs of new model
+ ioinfo["new-model-io"] = {
+ "inputs": {
+ "org-indices": [],
+ "new-indices": []
+ },
+ "outputs": {
+ "org-indices": [],
+ "new-indices": []
+ }
+ }
+ for input_tensor_idx in new_inputs:
+ ioinfo["new-model-io"]["inputs"]["org-indices"].append(int(input_tensor_idx))
+ ioinfo["new-model-io"]["inputs"]["new-indices"].append(
+ used_tensors[input_tensor_idx])
+ for output_tensor_idx in new_outputs:
+ ioinfo["new-model-io"]["outputs"]["org-indices"].append(int(output_tensor_idx))
+ ioinfo["new-model-io"]["outputs"]["new-indices"].append(
+ used_tensors[output_tensor_idx])
+
+ with open(path, "w") as json_file:
+ json_file.write(f'{json.dumps(ioinfo, indent=2)}\n')
+
+
def main(args):
input_model_file = args.input_model
oplist_file = args.opcode_list
if tensor.Buffer() != 0:
used_buffers.append(tensor.Buffer())
+ used_buffers = list(set(used_buffers))
used_buffers.sort()
# Assign new index for operator
output_model_file.write(new_buf)
+ org_inputs = sample_subgraph.InputsAsNumpy()
+ org_outputs = sample_subgraph.OutputsAsNumpy()
+ if args.store_io_info != "":
+ StoreIOInfo(args.store_io_info, used_tensors_dic, org_inputs, org_outputs,
+ new_input_tensors, new_output_tensors)
+
if __name__ == '__main__':
# Define argument and read
"output_model", type=argparse.FileType('wb'), help="output tflite model file")
arg_parser.add_argument(
'-g', '--subgraph', type=int, default=0, help="subgraph to use (default: 0)")
+ arg_parser.add_argument(
+ '-s',
+ '--store-io-info',
+ type=str,
+ required=False,
+ default="",
+ help="Path to io information to be stored")
# TODO
# Select multiple subgraph