--- /dev/null
+version: 2
+test:
+ - name: NN Runtime
+ testCaseLanguage: CPP
+ testFW: GTEST
+ testCaseFolder:
+ - ./compute/test/cker
+ - ./runtime/onert/core/src/backend/cpu_common
+ - ./runtime/onert/frontend/nnapi
+ - ./runtime/onert/test/core/compiler
+ - ./runtime/onert/test/core/exec
+ - ./runtime/onert/test/core/interp
+ - ./runtime/onert/test/graph
+ - ./runtime/onert/test/graph/operand
+ - ./runtime/onert/test/graph/operation
+ - ./runtime/onert/test/graph/verifier
+ - ./runtime/onert/test/ir
+ - ./runtime/onert/test/util
+ - ./tests/nnapi/src
+ - ./tests/nnfw_api/src
+ - ./tests/tools/tflite_run/src
+
+ testFile:
+ - extension: cpp
+ any: true
+ - extension: cc
+ any: true
+
+ testCase:
+ - condition:
+ - functionName:
+ starts:
+ - TEST
+
+ negativeTestCase:
+ - condition:
+ - testName:
+ starts:
+ - neg_
+
+ positiveTestCase:
+ - condition:
+ - inverse: negativeTestCase
--exclude=build
--exclude=tags
--exclude=tests/scripts/framework/cache
+--exclude=tests/scripts/models/cache
--exclude=tools/cross/rootfs
--exclude=doxygen
*.pyc
# Test cache for model download
-/tests/scripts/framework/cache
+/tests/scripts/**/cache
# Test report
/report
install: $(TIMESTAMP_INSTALL)
-create_tar: runtime_tar_internal
+create_package: runtime_tar_internal
+
+create_acl_tar: acl_tar_internal
clean:
rm -rf $(WORKSPACE)
touch $(TIMESTAMP_INSTALL)
runtime_tar_internal: $(TIMESTAMP_BUILD) install_internal
- tar -zcf nnfw-package.tar.gz -C $(INSTALL_PATH) lib
- tar -zcf nnfw-dev-package.tar.gz -C $(INSTALL_PATH) include/nnfw
- tar -zcf nnfw-internal-dev-package.tar.gz -C $(INSTALL_PATH) include/onert
- mv nnfw-*package.tar.gz $(INSTALL_PATH)/.
+ tar -zcf $(WORKSPACE)/nnfw-package.tar.gz -C $(INSTALL_PATH) lib
+ tar -zcf $(WORKSPACE)/nnfw-devel-package.tar.gz -C $(INSTALL_PATH) include/nnfw
+ tar -zcf $(WORKSPACE)/nnfw-plugin-devel-package.tar.gz -C $(INSTALL_PATH) include/onert
+ tar -zcf $(WORKSPACE)/nnfw-test-package.tar.gz -C ${INSTALL_PATH} bin test unittest unittest_standalone
+
+acl_tar_internal: $(BUILD_FOLDER)
+ tar -zcf $(WORKSPACE)/nnfw-acl.tar.gz -C ${OVERLAY_FOLDER} lib
install_internal_acl:
# Workaround to install acl for test (ignore error when there is no file to copy)
--- /dev/null
+version: 2
+test:
+ - name: NN Compiler
+ testCaseLanguage: CPP
+ testFW: GTEST
+ testCaseFolder:
+ - ./angkor
+ - ./arser
+ - ./circle2circle
+ - ./circle-quantizer
+ - ./cwrap
+ - ./foder
+ - ./hermes
+ - ./hermes-std
+ - ./loco
+ - ./locomotiv
+ - ./locop
+ - ./logo
+ - ./logo-core
+ - ./luci
+ - ./luci-interpreter
+ - ./luci-value-test
+ - ./mio-circle
+ - ./mio-tflite
+ - ./oops
+ - ./pepper-assert
+ - ./pepper-str
+ - ./pepper-strcast
+ - ./pp
+ - ./record-minmax
+ - ./safemain
+ - ./souschef
+ - ./stdex
+ - ./tflite2circle
+
+ testFile:
+ - extension: .test.cpp
+ any: true
+
+ testCase:
+ - condition:
+ - functionName:
+ starts:
+ - TEST
+
+ negativeTestCase:
+ - condition:
+ - testName:
+ ends:
+ - _NEG
+
+ positiveTestCase:
+ - condition:
+ - inverse: negativeTestCase
--- /dev/null
+set(BCQ_TOOLS_FILES
+ generate_bcq_output_arrays
+ preserve_bcq_info
+)
+
+foreach(BCQ_TOOLS IN ITEMS ${BCQ_TOOLS_FILES})
+
+ set(BCQ_TOOLS_FILE ${BCQ_TOOLS})
+ set(BCQ_TOOLS_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${BCQ_TOOLS_FILE}")
+ set(BCQ_TOOLS_BIN "${CMAKE_CURRENT_BINARY_DIR}/${BCQ_TOOLS_FILE}")
+ set(BCQ_TOOLS_TARGET "${BCQ_TOOLS}_target")
+
+ add_custom_command(OUTPUT ${BCQ_TOOLS_BIN}
+ COMMAND ${CMAKE_COMMAND} -E copy "${BCQ_TOOLS_SRC}" "${BCQ_TOOLS_BIN}"
+ DEPENDS ${BCQ_TOOLS_SRC}
+ COMMENT "Generate ${BCQ_TOOLS_BIN}"
+ )
+
+ add_custom_target(${BCQ_TOOLS_TARGET} ALL DEPENDS ${BCQ_TOOLS_BIN})
+
+ install(FILES ${BCQ_TOOLS_BIN}
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION bin)
+
+endforeach(BCQ_TOOLS)
--- /dev/null
+# BCQ Tools
+
+This directory includes some tools related with BCQ.
+
+## preserve_bcq_info
+
+### Purpose
+
+`preserve_bcq_info` is for preserving constant nodes which include BCQ information.
+When `.pb` file is converted to `.tflite` file by TFlite converter, constant nodes whose values are exactly same are removed and then linked to only one representative node.
+This makes us impossible to know what constant node should be linked to a node which we want to apply BCQ.
+One of the solutions is making all the same constant nodes different by inserting unique values and ignore the newly generated unique values when BCQ fusing is applied.
+`preserve_bcq_info` will generate and insert unique dummy values to the constant nodes whose values are same not to be removed by Tensorflow Lite converter.
+As a result, BCQ information will be preserved.
+
+### How to use
+
+```bash
+preserve_bcq_info \
+--input_path /path/to/original_model.pb \
+--output_path /path/to/preserved_model.pb
+```
+
+### How it works
+
+If we add unique dummy value at the end of each constant nodes, all the constant nodes would be different. Following is an example.
+
+```
+[Original Constant Nodes]
+const(value=[1, 2, 3], name='const1')
+const(value=[1, 2, 3], name='const2')
+const(value=[1, 2, 3], name='const3')
+
+[After BCQ information preserved]
+const(value=[1, 2, 3, -1], name='const1')
+const(value=[1, 2, 3, -2], name='const2')
+const(value=[1, 2, 3, -3], name='const3')
+```
+
+For dummy values, negative values are used instead of positive values.
+This is because positive valus may be confused with original constant node values.
+For your information, unique dummy value starts from -1 and moves to -2, -3, ..., -N, where N is the number of preserved constant nodes.
+
+### Caution
+
+- Newly generated dummy values should be ignored when the constant nodes are used.
+
+## generate_bcq_output_arrays
+
+### Purpose
+
+To apply BCQ, BCQ information nodes should be designated as model output so that they are alive even after TFLite conversion is finished.
+However, there are so many nodes to designate and sometimes we cannot copy and paste all of them because the string size is too big.
+`generate_bcq_output_arrays` is for generating output_arrays, which include BCQ information nodes.
+
+### How to use
+
+```bash
+generate_bcq_output_arrays \
+--input_path /path/to/original_model.pb \
+--output_path /path/to/output_arrays.txt
+```
+
+### How it works
+
+```
+[Original BCQ information nodes]
+const(value=[1, 2, 3, -1], name='const1')
+const(value=[1, 2, 3, -2], name='const2')
+const(value=[1, 2, 3, -3], name='const3')
+
+[Generated output_arrays]
+,const1,const2,const3
+```
+
+### Caution
+
+- Generated output_arrays will be start with comma.
--- /dev/null
+#!/usr/bin/env python3
+
+import tensorflow as tf
+
+import argparse
+import sys
+
+
+def _get_parser():
+ """
+ Returns an ArgumentParser for generating output_arrays.
+ """
+ parser = argparse.ArgumentParser(
+ description=("Command line tool to generated output_arrays of BCQ nodes"))
+
+ # Input and output path.
+ parser.add_argument(
+ "-i",
+ "--input_path",
+ type=str,
+ help="Full filepath of the input file.",
+ required=True)
+ parser.add_argument(
+ "-o",
+ "--output_path",
+ type=str,
+ help="Full filepath of the output file.",
+ required=True)
+
+ return parser
+
+
+def load_graph(frozen_graph_filename):
+ """
+ Load graph from frozen pb file
+ """
+ with tf.compat.v1.gfile.GFile(frozen_graph_filename, "rb") as f:
+ graph_def = tf.compat.v1.GraphDef()
+ graph_def.ParseFromString(f.read())
+ with tf.Graph().as_default() as graph:
+ tf.import_graph_def(graph_def, name='')
+ return graph
+
+
+def dtype2str(dtype):
+ if dtype == "int32":
+ return "TF_INT32"
+ elif dtype == "int64":
+ return "TF_INT64"
+ elif dtype == "float32":
+ return "TF_FLOAT"
+ elif dtype == "bool":
+ return "TF_BOOL"
+ else:
+ raise Exception("Not supported dtype")
+
+
+def print_output_arrays(flags):
+ graph_model = load_graph(flags.input_path)
+ graph_model_def = graph_model.as_graph_def()
+ ops = graph_model.get_operations()
+
+ output_names = [op.outputs[0].name for op in ops
+ if op.type == "Const" and "bcqinfo_" in op.outputs[0].name]
+
+ output_arrays = ""
+ for output_name in output_names:
+ output_arrays += ","
+
+ colon_index = output_name.find(":")
+ if colon_index == -1:
+ output_arrays += output_name
+ else:
+ output_arrays += output_name[:colon_index]
+
+ f = open(flags.output_path, 'w')
+ f.write(output_arrays)
+ f.close()
+
+
+def main():
+ # Parse argument.
+ parser = _get_parser()
+ flags = parser.parse_known_args(args=sys.argv[1:])
+
+ print_output_arrays(flags[0])
+
+
+if __name__ == "__main__":
+ main()
--- /dev/null
+#!/usr/bin/env python3
+
+import tensorflow as tf
+import numpy as np
+
+import argparse
+import sys
+
+
+def _get_parser():
+ """
+ Returns an ArgumentParser for preserving BCQ information.
+ """
+ parser = argparse.ArgumentParser(
+ description=("Command line tool to preserve BCQ information"))
+
+ # Input and output path.
+ parser.add_argument(
+ "-i",
+ "--input_path",
+ type=str,
+ help="Full filepath of the input file.",
+ required=True)
+ parser.add_argument(
+ "-o",
+ "--output_path",
+ type=str,
+ help="Full filepath of the output file.",
+ required=True)
+
+ return parser
+
+
+def load_graph(frozen_graph_filename):
+ """
+ Load graph from frozen pb file
+ """
+ with tf.compat.v1.gfile.GFile(frozen_graph_filename, "rb") as f:
+ graph_def = tf.compat.v1.GraphDef()
+ graph_def.ParseFromString(f.read())
+ with tf.Graph().as_default() as graph:
+ tf.import_graph_def(graph_def, name='')
+ return graph
+
+
+def preserve_bcq_info(flags):
+ """
+ Generate unique dummy value from -1 to -N.
+
+ We use negative values to preserve BCQ information because
+ positive values may cause some confusion with real BCQ information values.
+ """
+
+ class UniqueValueGen:
+ def __init__(self):
+ self.unique_value = -1
+
+ def gen(self):
+ val = self.unique_value
+ self.unique_value = val - 1
+ return val
+
+ unique_value = UniqueValueGen()
+
+ original_graph_model = load_graph(flags.input_path)
+ original_graph_model_def = original_graph_model.as_graph_def()
+
+ new_graph = tf.compat.v1.GraphDef()
+ substitution_dict = {}
+
+ DT_INT32 = None # Just for copying DT_INT32 attribute value
+
+ for node in original_graph_model_def.node:
+ if node.op == "Const":
+ # Because bcqinfo_do_w_x is BOOL type, we cannot add dummy value at the end.
+ # Therefore we should convert the type to INT32 type.
+ if "/bcqinfo_do_w_x" in node.name:
+ original_tensor = tf.make_ndarray(node.attr["value"].tensor)
+ substitution_dict[node.name] = tf.make_tensor_proto(
+ [int(original_tensor[0]), unique_value.gen()], tf.int32)
+
+ preserved_bcqinfo_list = ["/bcqinfo_number_of_clusters", "/bcqinfo_size_of_clusters",
+ "/bcqinfo_qbits_of_clusters"]
+
+ if any(name in node.name for name in preserved_bcqinfo_list):
+ original_tensor = tf.make_ndarray(
+ node.attr["value"].tensor) # variable name change
+ substitution_dict[node.name] = tf.make_tensor_proto(
+ np.append(original_tensor, unique_value.gen()), tf.int32)
+ DT_INT32 = node.attr["dtype"]
+
+ for node in original_graph_model_def.node:
+ if node.name in substitution_dict:
+ new_node = new_graph.node.add()
+ new_node.op = "Const"
+ new_node.name = node.name
+ new_node.attr["dtype"].CopyFrom(DT_INT32)
+ new_node.attr["value"].tensor.CopyFrom(substitution_dict[node.name])
+ else:
+ new_node = new_graph.node.add()
+ new_node.CopyFrom(node)
+
+ tf.io.write_graph(new_graph, '.', flags.output_path, False)
+
+
+def main():
+ # Parse argument.
+ parser = _get_parser()
+ flags = parser.parse_known_args(args=sys.argv[1:])
+
+ # Generate a new pb file, which BCQ information is preserved.
+ preserve_bcq_info(flags[0])
+
+
+if __name__ == "__main__":
+ main()
target_link_libraries(circle-quantizer luci_pass)
target_link_libraries(circle-quantizer luci_export)
target_link_libraries(circle-quantizer arser)
+target_link_libraries(circle-quantizer vconone)
install(TARGETS circle-quantizer DESTINATION bin)
require("luci")
require("oops")
require("arser")
+require("vconone")
#include <oops/InternalExn.h>
#include <arser/arser.h>
+#include <vconone/vconone.h>
#include <functional>
#include <iostream>
using Algorithms = luci::CircleOptimizer::Options::Algorithm;
using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters;
+void print_version(void)
+{
+ std::cout << "circle-quantizer version " << vconone::get_string() << std::endl;
+ std::cout << vconone::get_copyright() << std::endl;
+}
+
int entry(int argc, char **argv)
{
// Simple argument parser (based on map)
arser::Arser arser("circle-quantizer provides circle model quantization");
+ arser.add_argument("--version")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("Show version information and exit")
+ .exit_with(print_version);
+
arser.add_argument(qdqw)
.nargs(3)
.type(arser::DataType::STR_VEC)
.required(false)
.help("Quantize-dequantize weight values required action before quantization. "
"Three arguments required: input_dtype(float32) "
- "output_dtype(uint8) granularity(layer)");
+ "output_dtype(uint8) granularity(layer, channel)");
arser.add_argument(qwmm)
.nargs(3)
.required(false)
.help("Quantize with min/max values. "
"Three arguments required: input_dtype(float32) "
- "output_dtype(uint8) granularity(layer)");
+ "output_dtype(uint8) granularity(layer, channel)");
arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
{
std::cout << err.what() << std::endl;
std::cout << arser;
- return 0;
+ return 255;
+ }
+
+ if (arser["--tensors_to_hdf5"] == arser["--tensors"])
+ {
+ std::cout << "[Error] You must specify one option for how to print." << std::endl;
+ std::cout << arser;
+ return 255;
}
std::unique_ptr<circletensordump::DumpInterface> dump;
auto max = quant_param->max();
auto scale = quant_param->scale();
auto zero_point = quant_param->zero_point();
+ auto quantized_dimension = quant_param->quantized_dimension();
os << " " + print_format2 + " ├── min : ";
::print_comma_sepearted(os, min);
os << " " + print_format2 + " ├── scale : ";
::print_comma_sepearted(os, scale);
os << std::endl;
- os << " " + print_format2 + "Â Â â\94\94── zero_point : ";
+ os << " " + print_format2 + "Â Â â\94\9c── zero_point : ";
::print_comma_sepearted(os, zero_point);
os << std::endl;
+ os << " " + print_format2 + " └── quantized_dimension : " << quantized_dimension;
+ os << std::endl;
}
// buffer
}
/**
- * This function writes data to given hdf5 file like below.
+ * This function writes vector data to given hdf5 file like below.
*
* GROUP "group_name"
* ㄴDATATYPE "type"
* ㄴDATA "data"
*/
template <typename T>
-void write_data_to_hdf5(H5::H5File &file, std::string &group_name, std::string dataset_name,
- const H5::PredType &type, const flatbuffers::Vector<T> *data,
- std::vector<hsize_t> dims)
+void write_vector_data_to_hdf5(H5::H5File &file, std::string &group_name, std::string dataset_name,
+ const H5::PredType &type, const flatbuffers::Vector<T> *data,
+ std::vector<hsize_t> dims)
{
if (data == nullptr)
return;
dataset->write(data->data(), type);
}
+/// @brief This function writes scalar data to given hdf5 file
+template <typename T>
+void write_scalar_data_to_hdf5(H5::H5File &file, std::string &group_name, std::string dataset_name,
+ const H5::PredType &type, T data)
+{
+ auto dataspace = std::make_unique<H5::DataSpace>(H5S_SCALAR);
+ auto dataset = std::make_unique<H5::DataSet>(
+ file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
+ dataset->write(&data, type);
+}
+
} // namespace
namespace circletensordump
auto buff_data_ptr = reader.buffers()->Get(buff_idx)->data();
if (buff_data_ptr)
{
- ::write_data_to_hdf5(file, group_name, "weights", ::hdf5_dtype_cast(tensor->type()),
- buff_data_ptr, ::hdf5_dims_cast(buff_data_ptr, tensor->shape()));
+ ::write_vector_data_to_hdf5(file, group_name, "weights", ::hdf5_dtype_cast(tensor->type()),
+ buff_data_ptr,
+ ::hdf5_dims_cast(buff_data_ptr, tensor->shape()));
}
// write quantization parameters
if (quant_param)
{
auto min = quant_param->min();
- ::write_data_to_hdf5(file, group_name, "min", H5::PredType::NATIVE_FLOAT, min,
- ::hdf5_dims_cast(min));
+ ::write_vector_data_to_hdf5(file, group_name, "min", H5::PredType::NATIVE_FLOAT, min,
+ ::hdf5_dims_cast(min));
auto max = quant_param->max();
- ::write_data_to_hdf5(file, group_name, "max", H5::PredType::NATIVE_FLOAT, max,
- ::hdf5_dims_cast(max));
+ ::write_vector_data_to_hdf5(file, group_name, "max", H5::PredType::NATIVE_FLOAT, max,
+ ::hdf5_dims_cast(max));
auto scale = quant_param->scale();
- ::write_data_to_hdf5(file, group_name, "scale", H5::PredType::NATIVE_FLOAT, scale,
- ::hdf5_dims_cast(scale));
+ ::write_vector_data_to_hdf5(file, group_name, "scale", H5::PredType::NATIVE_FLOAT, scale,
+ ::hdf5_dims_cast(scale));
auto zero_point = quant_param->zero_point();
- ::write_data_to_hdf5(file, group_name, "zero_point", H5::PredType::NATIVE_INT64, zero_point,
- ::hdf5_dims_cast(zero_point));
+ ::write_vector_data_to_hdf5(file, group_name, "zero_point", H5::PredType::NATIVE_INT64,
+ zero_point, ::hdf5_dims_cast(zero_point));
+ auto quantized_dimension = quant_param->quantized_dimension();
+ ::write_scalar_data_to_hdf5(file, group_name, "quantized_dimension",
+ H5::PredType::NATIVE_INT32, quantized_dimension);
}
}
}
{
std::cout << err.what() << std::endl;
std::cout << arser;
- return 0;
+ return 255;
}
auto verifier = std::make_unique<VerifyFlatbuffers>();
nnas_include(TargetRequire)
unset(REQUIRED_TARGETS)
-list(APPEND REQUIRED_TARGETS circlechef)
list(APPEND REQUIRED_TARGETS circle-inspect)
list(APPEND REQUIRED_TARGETS circle-verify)
list(APPEND REQUIRED_TARGETS circle2circle)
list(APPEND REQUIRED_TARGETS dredd_rule_lib)
-list(APPEND REQUIRED_TARGETS tflchef)
-list(APPEND REQUIRED_TARGETS tflite2circle)
TargetRequire_Return(${REQUIRED_TARGETS})
-nncc_find_resource(TensorFlowLiteRecipes)
-nncc_find_resource(CircleRecipes)
-
-set(TFLITE_RECIPE_REPO "${TensorFlowLiteRecipes_DIR}")
-set(CIRCLE_RECIPE_REPO "${CircleRecipes_DIR}")
-unset(RECIPE_REPO)
-
-set(TEST_RECIPE_FILENAME "test.recipe")
-set(TEST_RULE_FILENAME "test.rule")
-
unset(TEST_DEPS)
unset(TEST_NAMES)
set(oneValueArgs "")
set(multiValueArgs PASS)
-macro(Add RECIPE)
- if(NOT EXISTS "${TFLITE_RECIPE_REPO}/${RECIPE}/test.recipe")
- if(NOT EXISTS "${CIRCLE_RECIPE_REPO}/${RECIPE}/test.recipe")
- message(FATAL_ERROR "Missing recipe of '${RECIPE}' test")
- else()
- set(RECIPE_REPO ${CIRCLE_RECIPE_REPO})
- endif()
- else()
- set(RECIPE_REPO ${TFLITE_RECIPE_REPO})
- endif()
-
- if(NOT EXISTS "${RECIPE_REPO}/${RECIPE}/test.rule")
- message(FATAL_ERROR "Missing rule of '${RECIPE}' test")
- endif()
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+macro(Add RECIPE)
cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
unset(OPT_OPTIONS)
foreach(src ${ARG_PASS})
list(APPEND OPT_OPTIONS "--${src}")
endforeach(src ${ARG_PASS})
- set(RECIPE_FILE "${RECIPE}.recipe")
- set(RECIPE_SOURCE_PATH "${RECIPE_REPO}/${RECIPE}/${TEST_RECIPE_FILENAME}")
- set(RECIPE_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE_FILE}")
-
- set(RULE_FILE "${RECIPE}.rule")
- set(RULE_SOURCE_PATH "${RECIPE_REPO}/${RECIPE}/${TEST_RULE_FILENAME}")
- set(RULE_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RULE_FILE}")
-
- set(TFLITE_FILE "${RECIPE}.tflite")
- set(TFLITE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${TFLITE_FILE}")
-
set(CIRCLE_FILE "${RECIPE}.circle")
- set(CIRCLE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${CIRCLE_FILE}")
+ set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${CIRCLE_FILE}")
set(OPT_CIRCLE_FILE "${RECIPE}.opt.circle")
set(OPT_CIRCLE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${OPT_CIRCLE_FILE}")
- # Copy .recipe
- add_custom_command(OUTPUT ${RECIPE_BINARY_PATH}
- COMMAND ${CMAKE_COMMAND} -E copy "${RECIPE_SOURCE_PATH}" "${RECIPE_BINARY_PATH}"
- DEPENDS ${RECIPE_SOURCE_PATH}
- COMMENT "Generate ${RECIPE_FILE}"
- )
-
- # Copy .rule
- add_custom_command(OUTPUT ${RULE_BINARY_PATH}
- COMMAND ${CMAKE_COMMAND} -E copy "${RULE_SOURCE_PATH}" "${RULE_BINARY_PATH}"
- DEPENDS ${RULE_SOURCE_PATH}
- COMMENT "Generate ${RULE_FILE}"
- )
-
- if(${RECIPE_REPO} STREQUAL ${TFLITE_RECIPE_REPO})
- # Generate .tflite
- add_custom_command(OUTPUT ${TFLITE_OUTPUT_PATH}
- COMMAND $<TARGET_FILE:tflchef-file> ${RECIPE_BINARY_PATH} ${TFLITE_OUTPUT_PATH}
- DEPENDS $<TARGET_FILE:tflchef-file> ${RECIPE_BINARY_PATH}
- COMMENT "Generate ${TFLITE_FILE}"
- )
-
- # Generate .circle
- add_custom_command(OUTPUT ${CIRCLE_OUTPUT_PATH}
- COMMAND $<TARGET_FILE:tflite2circle> ${TFLITE_OUTPUT_PATH} ${CIRCLE_OUTPUT_PATH}
- DEPENDS $<TARGET_FILE:tflite2circle> ${TFLITE_OUTPUT_PATH}
- COMMENT "Generate ${CIRCLE_FILE}"
- )
-
- list(APPEND TEST_DEPS ${TFLITE_OUTPUT_PATH})
- else()
- # Generate .circle
- add_custom_command(OUTPUT ${CIRCLE_OUTPUT_PATH}
- COMMAND $<TARGET_FILE:circlechef-file> ${RECIPE_BINARY_PATH} ${CIRCLE_OUTPUT_PATH}
- DEPENDS $<TARGET_FILE:circlechef-file> ${RECIPE_BINARY_PATH}
- COMMENT "Generate ${CIRCLE_FILE}"
- )
- endif()
-
# Generate optimized .circle
add_custom_command(OUTPUT ${OPT_CIRCLE_OUTPUT_PATH}
- COMMAND $<TARGET_FILE:circle2circle> ${OPT_OPTIONS} ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH}
- DEPENDS $<TARGET_FILE:circle2circle> ${CIRCLE_OUTPUT_PATH}
+ COMMAND $<TARGET_FILE:circle2circle> ${OPT_OPTIONS} ${CIRCLE_PATH} ${OPT_CIRCLE_OUTPUT_PATH}
+ DEPENDS $<TARGET_FILE:circle2circle> ${CIRCLE_PATH}
COMMENT "Generate ${OPT_CIRCLE_FILE}"
)
- list(APPEND TEST_DEPS ${RECIPE_BINARY_PATH} ${RULE_BINARY_PATH}
- ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH})
+ list(APPEND TEST_DEPS ${OPT_CIRCLE_OUTPUT_PATH})
list(APPEND TEST_NAMES ${RECIPE})
endmacro(Add)
# Generate dependencies
add_custom_target(circle2circle_dredd_recipe_test ALL DEPENDS ${TEST_DEPS})
+add_dependencies(circle2circle_dredd_recipe_test common_artifacts_deps)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
# Run tests
add_test(
NAME circle2circle_dredd_recipe_test
COMMAND "${TEST_RUNNER}"
"${TEST_CONFIG}"
- "${CMAKE_CURRENT_BINARY_DIR}"
+ "${ARTIFACTS_BIN_PATH}"
${TEST_NAMES}
)
-require("circlechef")
require("circle2circle")
require("circle-inspect")
require("circle-verify")
+require("common-artifacts")
require("dredd-rule-lib")
-require("tflchef")
-require("tflite2circle")
## TFLITE RECIPE
Add(Net_InstanceNorm_001 PASS fuse_instnorm)
-# Add(Net_InstanceNorm_002 PASS fuse_instnorm)
+Add(Net_InstanceNorm_002 PASS fuse_instnorm)
Add(BatchMatMulV2_000 PASS resolve_customop_batchmatmul)
Add(MatMul_000 PASS resolve_customop_matmul)
+Add(DepthwiseConv2D_003 PASS)
## CIRCLE RECIPE
exit 255
fi
+WORKDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
CONFIG_PATH="$1"; shift
-WORKDIR="$1"; shift
+RESOURCE_DIR="$1"; shift
source "${CONFIG_PATH}"
echo "-- Found circle-inspect: ${CIRCLE_INSPECT_PATH}"
echo "-- Found circle-verify: ${CIRCLE_VERIFY_PATH}"
echo "-- Found circle2circle: ${CIRCLE2CIRCLE_PATH}"
-echo "-- Found workdir: ${WORKDIR}"
+echo "-- Found common-artifacts: ${RESOURCE_DIR}"
TESTED=()
PASSED=()
FAILED=()
-pushd "${WORKDIR}"
+pushd ${WORKDIR}
while [[ $# -ne 0 ]]; do
PREFIX="$1"; shift
cat > "${PREFIX}.log" <(
exec 2>&1
- echo "-- Found tflite: ${PREFIX}.tflite"
+ echo "-- Found circle: ${PREFIX}.opt.circle"
# Exit immediately if any command fails
set -e
set +x
# (COMPILED_FILE, INSPECT_PROG_PATH, VERIFY_PROG_PATH, ERROR_LOG) must be set for rule-lib.sh
- COMPILED_FILE="${WORKDIR}/${PREFIX}.opt.circle"
+ COMPILED_FILE="${PREFIX}.opt.circle"
INSPECT_PROG_PATH=${CIRCLE_INSPECT_PATH}
VERIFY_PROG_PATH=${CIRCLE_VERIFY_PATH}
ERROR_LOG="${PREFIX}.error"
trap 'echo "** ERROR **" ; cat "${ERROR_LOG}"' ERR
source rule-lib.sh
- source "${PREFIX}.rule"
+ source "${RESOURCE_DIR}/${PREFIX}.rule"
# unset
trap - ERR
target_link_libraries(circle2circle luci_pass)
target_link_libraries(circle2circle luci_export)
target_link_libraries(circle2circle arser)
+target_link_libraries(circle2circle vconone)
install(TARGETS circle2circle DESTINATION bin)
target_link_libraries(circle2circle_test luci_pass)
target_link_libraries(circle2circle_test luci_export)
target_link_libraries(circle2circle_test arser)
+target_link_libraries(circle2circle_test vconone)
require("hermes-std")
require("luci")
require("arser")
+require("vconone")
#include <oops/InternalExn.h>
#include <arser/arser.h>
+#include <vconone/vconone.h>
#include <functional>
#include <iostream>
using Algorithms = luci::CircleOptimizer::Options::Algorithm;
using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters;
+void print_version(void)
+{
+ std::cout << "circle2circle version " << vconone::get_string() << std::endl;
+ std::cout << vconone::get_copyright() << std::endl;
+}
+
int entry(int argc, char **argv)
{
// Simple argument parser (based on map)
arser::Arser arser("circle2circle provides circle model optimization and transformations");
+ arser.add_argument("--version")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("Show version information and exit")
+ .exit_with(print_version);
+
arser.add_argument("--all").nargs(0).required(false).default_value(false).help(
"Enable all optimize options");
add_subdirectory(circle)
# Tools
add_subdirectory(tools)
-add_subdirectory(tests)
+if(ENABLE_TEST)
+ add_subdirectory(tests)
+endif(ENABLE_TEST)
for (uint32_t idx = 0; idx < quant->zero_point()->size(); ++idx)
chef_quant->add_zero_point(quant->zero_point()->Get(idx));
}
+ circlechef::TensorQuantization *chef_quant = operand->mutable_quant();
+ chef_quant->set_quantized_dimension(quant->quantized_dimension());
}
}
quant_builder.add_min(quant_min);
quant_builder.add_scale(quant_scale);
quant_builder.add_zero_point(quant_zero_point);
+ quant_builder.add_quantized_dimension(quant.quantized_dimension());
// Update QuantizationParameters Index
quant_index = quant_builder.Finish();
repeated float max = 2;
repeated float scale = 3;
repeated int64 zero_point = 4;
+ optional int32 quantized_dimension = 5 [default = 0];
}
message Operand {
{
std::cout << err.what() << std::endl;
std::cout << arser;
- return 0;
+ return 255;
}
int32_t model_version = 1;
{
std::cout << err.what() << std::endl;
std::cout << arser;
- return 0;
+ return 255;
}
std::string circle_path = arser.get<std::string>("circle");
{
std::cout << err.what() << '\n';
std::cout << arser;
- return 0;
+ return 255;
}
std::string circle_path = arser.get<std::string>("circle");
}
};
+class UniquePrinter : public OpPrinter
+{
+public:
+ void options(const circle::Operator *op, std::ostream &os) const override
+ {
+ if (auto *params = op->builtin_options_as_UniqueOptions())
+ {
+ os << " ";
+ os << "idx_out_type(" << EnumNameTensorType(params->idx_out_type()) << ") ";
+ os << std::endl;
+ }
+ }
+};
+
class WhilePrinter : public OpPrinter
{
public:
_op_map[circle::BuiltinOperator_MAX_POOL_2D] = make_unique<Pool2DPrinter>();
_op_map[circle::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>();
_op_map[circle::BuiltinOperator_MUL] = make_unique<MulPrinter>();
+ // There is no Option for NON_MAX_SUPPRESSION_V4
_op_map[circle::BuiltinOperator_ONE_HOT] = make_unique<OneHotPrinter>();
_op_map[circle::BuiltinOperator_PACK] = make_unique<PackPrinter>();
// There is no Option for PAD
+ // There is no Option for PADV2
// There is no Option for PRELU
// There is no Option for RELU
// There is no Option for RELU6
_op_map[circle::BuiltinOperator_SUM] = make_unique<ReducerPrinter>();
_op_map[circle::BuiltinOperator_TRANSPOSE_CONV] = make_unique<TransposeConvPrinter>();
// There is no Option for TOPK_V2
+ _op_map[circle::BuiltinOperator_UNIQUE] = make_unique<UniquePrinter>();
_op_map[circle::BuiltinOperator_WHILE] = make_unique<WhilePrinter>();
_op_map[circle::BuiltinOperator_CUSTOM] = make_unique<CustomOpPrinter>();
endif()
# Create python virtual environment with tensorflow 1.13.2
-set(VIRTUALENV_OVERLAY "${NNCC_OVERLAY_DIR}/venv_1_13_2")
+set(VIRTUALENV_OVERLAY_TF_1_13_2 "${NNCC_OVERLAY_DIR}/venv_1_13_2")
+
+# Create python virtual environment with tensorflow 2.3.0
+set(VIRTUALENV_OVERLAY_TF_2_3_0 "${NNCC_OVERLAY_DIR}/venv_2_3_0")
+
+add_custom_command(
+ OUTPUT ${VIRTUALENV_OVERLAY_TF_1_13_2}
+ COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_1_13_2}
+)
add_custom_command(
- OUTPUT ${VIRTUALENV_OVERLAY}
- COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY}
+ OUTPUT ${VIRTUALENV_OVERLAY_TF_2_3_0}
+ COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_3_0}
)
# Create requirements.txt and install required pip packages
set(REQUIREMENTS_FILE "requirements.txt")
-set(REQUIREMENTS_OVERLAY_PATH "${NNCC_OVERLAY_DIR}/${REQUIREMENTS_FILE}")
+set(REQUIREMENTS_OVERLAY_PATH_TF_1_13_2 "${VIRTUALENV_OVERLAY_TF_1_13_2}/${REQUIREMENTS_FILE}")
+set(REQUIREMENTS_OVERLAY_PATH_TF_2_3_0 "${VIRTUALENV_OVERLAY_TF_2_3_0}/${REQUIREMENTS_FILE}")
add_custom_command(
- OUTPUT ${REQUIREMENTS_OVERLAY_PATH}
- COMMAND ${CMAKE_COMMAND} -E echo "tensorflow==1.13.2" > ${REQUIREMENTS_OVERLAY_PATH}
- COMMAND ${VIRTUALENV_OVERLAY}/bin/python -m pip --default-timeout=1000 install --upgrade pip setuptools
- COMMAND ${VIRTUALENV_OVERLAY}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH} --upgrade
- DEPENDS ${VIRTUALENV_OVERLAY} ${REQUIREMENTS_OVERLAY_PATH}
+ OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
+ COMMAND ${CMAKE_COMMAND} -E echo "tensorflow==1.13.2" > ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
+ COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install --upgrade pip setuptools
+ COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} --upgrade
+ DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2}
)
-add_custom_target(common_artifacts_python_deps ALL
- DEPENDS ${VIRTUALENV_OVERLAY} ${REQUIREMENTS_OVERLAY_PATH}
+add_custom_command(
+ OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+ COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.3.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+ COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+ COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip setuptools
+ COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} --upgrade
+ DEPENDS ${VIRTUALENV_OVERLAY_TF_2_3_0}
)
-# TODO Create python virtual environment with tensorflow 2.3.0-rc0
+add_custom_target(common_artifacts_python_deps ALL
+ DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2} ${VIRTUALENV_OVERLAY_TF_2_3_0} ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+)
#[[ Generate common resources ]]
# TODO add pbtxt
add_executable(testDataGenerator ${SOURCES})
target_include_directories(testDataGenerator PRIVATE ${HDF5_INCLUDE_DIRS})
target_link_libraries(testDataGenerator PRIVATE ${HDF5_CXX_LIBRARIES})
+target_link_libraries(testDataGenerator PRIVATE arser)
target_link_libraries(testDataGenerator PRIVATE foder)
target_link_libraries(testDataGenerator PRIVATE luci_import)
target_link_libraries(testDataGenerator PRIVATE luci_interpreter)
#[[ optimize : Exclude from circle optimization(circle2circle) ]]
## TensorFlowLiteRecipes
-optimize(ReLU6_000)
-optimize(Where_000)
-optimize(Where_001)
+optimize(Unique_000)
+optimize(Unique_001)
+optimize(Unique_002)
+optimize(Unique_003)
+optimize(Unique_U8_000)
+optimize(Unique_U8_001)
## CircleRecipes
tcgenerate(DepthwiseConv2D_001) # runtime doesn't support dilation
tcgenerate(DepthwiseConv2D_003) # runtime doesn't support dilation
tcgenerate(DepthwiseConv2D_U8_000)
+tcgenerate(DepthwiseConv2D_U8_001) # luci-interpreter doesn't support channel-wise quantization yet
tcgenerate(Div_000)
tcgenerate(ELU_000)
tcgenerate(Equal_000)
tcgenerate(MaxPool2D_U8_000)
tcgenerate(Mean_U8_000)
tcgenerate(Minimum_000)
+tcgenerate(NonMaxSuppressionV4_000)
+tcgenerate(NonMaxSuppressionV4_001)
tcgenerate(MirrorPad_000)
tcgenerate(Mul_U8_000)
tcgenerate(Neg_000)
tcgenerate(Net_Dangle_001)
tcgenerate(Net_InstanceNorm_001)
tcgenerate(Net_InstanceNorm_002)
-tcgenerate(Net_ZeroDim_001) # fix luci
+tcgenerate(Net_ZeroDim_001) # luci-interpreter doesn't support zero dim
tcgenerate(NotEqual_000)
tcgenerate(OneHot_000)
tcgenerate(OneHot_001)
tcgenerate(Pack_000)
tcgenerate(Pack_U8_000)
tcgenerate(Pad_U8_000)
+tcgenerate(PadV2_000)
tcgenerate(Pow_000)
tcgenerate(PRelu_000)
tcgenerate(Range_000)
tcgenerate(ReduceProd_002)
tcgenerate(ReduceProd_003)
tcgenerate(ReLU_000)
-tcgenerate(ReLU6_000) # luci NYI
+tcgenerate(ReLU6_000)
tcgenerate(ReLUN1To1_000)
-tcgenerate(Reshape_003) # fix luci
+tcgenerate(Reshape_003) # luci-interpreter doesn't support reshape without built-in option
tcgenerate(Reshape_U8_000)
tcgenerate(ResizeBilinear_000)
+tcgenerate(ResizeBilinear_U8_000) # luci-interpreter
tcgenerate(ResizeNearestNeighbor_000)
tcgenerate(ReverseSequence_000)
tcgenerate(ReverseV2_000)
tcgenerate(SpaceToBatchND_003)
tcgenerate(SpaceToDepth_000)
tcgenerate(SparseToDense_000)
-tcgenerate(SplitV_000) # fix luci
+tcgenerate(SplitV_000)
tcgenerate(Sqrt_000)
tcgenerate(Square_000)
tcgenerate(SquaredDifference_000)
tcgenerate(Tanh_000)
tcgenerate(Tile_000)
tcgenerate(Tile_U8_000)
-tcgenerate(TopKV2_000) # fix luci
-tcgenerate(TopKV2_001) # fix luci
-tcgenerate(TransposeConv_000) # fix interpreter
+tcgenerate(TopKV2_000)
+tcgenerate(TopKV2_001)
tcgenerate(Unique_000)
tcgenerate(Unique_001)
tcgenerate(Unique_002)
tcgenerate(Unique_003)
tcgenerate(Unique_U8_000)
tcgenerate(Unique_U8_001)
-tcgenerate(Where_000) # luci NYI
-tcgenerate(Where_001) # luci NYI
-tcgenerate(While_000) # fix luci
+tcgenerate(Where_000)
+tcgenerate(Where_001)
+tcgenerate(While_000)
tcgenerate(While_001)
tcgenerate(While_002)
tcgenerate(While_003)
-tcgenerate(YUV_TO_RGB_000) # fix luci
+tcgenerate(YUV_TO_RGB_000)
tcgenerate(YUV_TO_RGB_U8_000)
tcgenerate(ZerosLike_000)
+require("arser")
require("circle2circle")
require("circlechef")
require("foder")
* limitations under the License.
*/
+#include <arser/arser.h>
#include <foder/FileLoader.h>
#include <luci/Importer.h>
#include <luci_interpreter/Interpreter.h>
}
}
-void fill_random_data(void *data, uint32_t size, loco::DataType dtype)
+void fill_random_data(void *data, uint32_t size, loco::DataType dtype, uint32_t seed)
{
- std::random_device rd; // used to obtain a seed for the random number engine
- std::mt19937 gen(rd()); // standard mersenne_twister_engine seeded with rd()
+ std::mt19937 gen(seed); // standard mersenne_twister_engine seeded with rd()
switch (dtype)
{
int entry(int argc, char **argv)
{
- std::string circle_file{argv[1]};
+ arser::Arser arser;
+ arser.add_argument("circle").type(arser::DataType::STR).help("Circle file you want to test");
+ arser.add_argument("--fixed_seed")
+ .required(false)
+ .nargs(0)
+ .help("Put a fixed seed into the random number generator");
+
+ try
+ {
+ arser.parse(argc, argv);
+ }
+ catch (const std::runtime_error &err)
+ {
+ std::cout << err.what() << std::endl;
+ std::cout << arser;
+ return 255;
+ }
+
+ std::string circle_file = arser.get<std::string>("circle");
size_t last_dot_index = circle_file.find_last_of(".");
std::string prefix = circle_file.substr(0, last_dot_index);
std::unique_ptr<H5::Group> output_value_group =
std::make_unique<H5::Group>(output_file.createGroup("value"));
+ std::random_device rd; // used to obtain a seed for the random number engine
uint32_t input_index = 0;
for (uint32_t g = 0; g < circle_model->subgraphs()->size(); g++)
{
std::vector<int8_t> data(byte_size);
// generate random data
- fill_random_data(data.data(), data_size, input_node->dtype());
+ if (arser["--fixed_seed"])
+ fill_random_data(data.data(), data_size, input_node->dtype(), 0);
+ else
+ fill_random_data(data.data(), data_size, input_node->dtype(), rd());
dataset->write(data.data(), dtype);
#include <gtest/gtest.h>
-TEST(HermesTest, simple_usecase)
+namespace
{
- // TO BE FILLED
+
+class Logger final : public hermes::Source
+{
+public:
+ Logger(hermes::Context *ctx);
+ ~Logger();
+};
+
+Logger::Logger(hermes::Context *ctx) { activate(ctx->sources(), ctx->bus()); }
+Logger::~Logger() { deactivate(); }
+
+} // namespace
+
+TEST(HermesTest, logger_constructor_NEG)
+{
+ hermes::Context context;
+ // we expect segmentfault from nullptr->sources()
+ ASSERT_DEATH(Logger logger(&context), "");
+
+ SUCCEED();
}
+
+// TODO add HermesTest simple_usecase
}
} // namespace
-TEST(NodeExecution_BiasEncode, s32) { test<int32_t>(); }
+TEST(NodeExecution_BiasEncode, s32)
+{
+ test<int32_t>();
+
+ SUCCEED();
+}
-TEST(NodeExecution_BiasEncode, f32) { test<float>(); }
+TEST(NodeExecution_BiasEncode, f32)
+{
+ test<float>();
+
+ SUCCEED();
+}
};
run_test<float>(lhs, rhs, out, Shape{2, 3}, Shape{3, 3}, Shape{2, 3}, loco::DataType::FLOAT32);
+
+ SUCCEED();
}
/* from the code below:
};
run_test<int32_t>(lhs, rhs, out, Shape{4, 2}, Shape{2, 6}, Shape{4, 6}, loco::DataType::S32);
+
+ SUCCEED();
}
// clang-format on
// TODO Validate the output (when the implementation becomes stable)
std::cout << locop::fmt<locop::LinearV1>(g) << std::endl;
+
+ SUCCEED();
}
TEST(LinearV1FormatterTest, user_defined_node_summary_builder)
tensor_shape->dim(0) = 4;
std::cout << fmt<TensorShapeFormat::Bracket>(tensor_shape.get()) << std::endl;
+
+ SUCCEED();
}
//
// Note that due to historical and performance reasons, per-tensor quantization uses unsigned
// integer types, while per-channel uses signed types assuming 'zero_point' == 0.
-//
-// TODO Add 'quantized_dimension' field for per-channel case when IR provides it.
struct AffineQuantization
{
std::vector<float> scale;
std::vector<int32_t> zero_point;
+ int32_t quantized_dimension;
};
class Tensor
return _quantization.zero_point[0];
}
+ const std::vector<float> &scales() const { return _quantization.scale; }
+
+ const std::vector<int32_t> &zero_points() const { return _quantization.zero_point; }
+
+ int32_t quantized_dimension() const { return _quantization.quantized_dimension; }
+
template <typename T> const T *data() const { return reinterpret_cast<const T *>(_data.get()); }
template <typename T> T *data() { return reinterpret_cast<T *>(_data.get()); }
Activation activation;
};
+struct DepthToSpaceParams
+{
+ int block_size;
+};
+
struct DepthwiseConv2DParams
{
Padding padding;
void Add::configure()
{
- assert(input1()->element_type() == input2()->element_type());
+ if (input1()->element_type() != input2()->element_type())
+ {
+ throw std::runtime_error("Input Tensor Data Type Mismatch.");
+ }
output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
}
}
}
+TEST(AddTest, Input_Output_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2});
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(AddTest, Invalid_Input_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1});
+ Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2});
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
Concatenation.cpp
Conv2D.h
Conv2D.cpp
+ DepthToSpace.h
+ DepthToSpace.cpp
DepthwiseConv2D.h
DepthwiseConv2D.cpp
Elu.h
Pad.cpp
Reshape.h
Reshape.cpp
+ Reverse.h
+ Reverse.cpp
+ Slice.h
+ Slice.cpp
Softmax.h
Softmax.cpp
SpaceToDepth.h
AveragePool2D.test.cpp
Concatenation.test.cpp
Conv2D.test.cpp
+ DepthToSpace.test.cpp
DepthwiseConv2D.test.cpp
Elu.test.cpp
FullyConnected.test.cpp
Mul.test.cpp
Pad.test.cpp
Reshape.test.cpp
+ Reverse.test.cpp
+ Slice.test.cpp
Softmax.test.cpp
SpaceToDepth.test.cpp
Split.test.cpp
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthToSpace.h"
+#include "Utils.h"
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams ¶ms)
+ : KernelWithParams<DepthToSpaceParams>({input}, {output}, params)
+{
+}
+
+void DepthToSpace::configure()
+{
+ if (input()->shape().num_dims() != 4)
+ {
+ throw std::runtime_error("Invalid input num_dims.");
+ }
+ if (output()->element_type() != DataType::FLOAT32 && output()->element_type() != DataType::U8 &&
+ output()->element_type() != DataType::S8 && output()->element_type() != DataType::S32 &&
+ output()->element_type() != DataType::S64)
+ {
+ throw std::runtime_error("Invalid output type");
+ }
+ if (input()->element_type() != output()->element_type())
+ {
+ throw std::runtime_error("Type mismatch on input and output.");
+ }
+ const int block_size = params().block_size;
+ const int32_t input_height = input()->shape().dim(1);
+ const int32_t input_width = input()->shape().dim(2);
+ const int32_t input_channels = input()->shape().dim(3);
+ int32_t output_height = input_height * block_size;
+ int32_t output_width = input_width * block_size;
+ int32_t output_channels = input_channels / block_size / block_size;
+
+ assert(input_height == output_height / block_size);
+ assert(input_width == output_width / block_size);
+ assert(input_channels == output_channels * block_size * block_size);
+
+ Shape output_shape(4);
+ output_shape.dim(0) = input()->shape().dim(0);
+ output_shape.dim(1) = output_height;
+ output_shape.dim(2) = output_width;
+ output_shape.dim(3) = output_channels;
+
+ output()->resize(output_shape);
+}
+
+void DepthToSpace::execute() const
+{
+ tflite::DepthToSpaceParams op_params;
+ op_params.block_size = params().block_size;
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::optimized_ops::DepthToSpace(op_params, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ tflite::optimized_ops::DepthToSpace(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported Type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class DepthToSpace : public KernelWithParams<DepthToSpaceParams>
+{
+public:
+ DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams ¶ms);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthToSpace.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class DepthToSpaceTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(DepthToSpaceTest, DataTypes);
+
+TYPED_TEST(DepthToSpaceTest, SimpleCase)
+{
+ std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+ Shape input_shape{1, 1, 2, 4};
+ std::vector<TypeParam> output_data{1, 2, 5, 6, 3, 4, 7, 8};
+ std::vector<int32_t> output_shape{1, 2, 4, 1};
+
+ Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data);
+ Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+ DepthToSpaceParams params{};
+ params.block_size = 2;
+
+ DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+ ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
ElementsAreArray(ArrayFloatNear(ref_output_data)));
}
-TEST(L2NormalizeTest, Uint8Quantized)
-{
- // TODO
- // Implement GetDequantizedOutput Function.
- // Create Test for Uint8 Case
-}
+// TODO Uint8Quantized
+// Implement GetDequantizedOutput Function.
+// Create Test for Uint8 Case
} // namespace
} // namespace kernels
1.0f, -0.5f, -1.0f, // Row 2
},
/*alpha=*/0.5f, getElementType<float>());
-}
-TEST(LeakReluTest, Uint8Simple)
-{
- // TODO
- // Implement GetDequantizedOutput Function.
- // Create Test for Uint8 Case
+ SUCCEED();
}
+// TODO Uint8Simple
+// Implement GetDequantizedOutput Function.
+// Create Test for Uint8 Case
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
// TODO make a Shape checking of output_tensor.
}
-TEST(LogisticTest, Uint8)
-{
- // Need to Implement GetDequantizedOutput Function.
-}
+// TODO Uint8
+// Need to Implement GetDequantizedOutput Function.
} // namespace
} // namespace kernels
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Reverse.h"
+#include "kernels/Utils.h"
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Reverse::Reverse(const Tensor *input, const Tensor *axes, Tensor *output)
+ : Kernel({input, axes}, {output})
+{
+}
+
+void Reverse::configure()
+{
+ assert(axes()->shape().num_dims() == 1);
+ assert(input()->shape().num_dims() >= axes()->shape().num_elements());
+ if (input()->element_type() != DataType::S32 && input()->element_type() != DataType::FLOAT32 &&
+ input()->element_type() != DataType::U8 && input()->element_type() != DataType::S16 &&
+ input()->element_type() != DataType::S64)
+ {
+ throw std::runtime_error("Unsupported input type.");
+ }
+ if (axes()->element_type() != DataType::S32)
+ {
+ throw std::runtime_error("Unsupported axes type.");
+ }
+ if (axes()->shape().num_elements() > 1)
+ {
+ throw std::runtime_error("Current implementation does not support more than 1 axis.");
+ }
+ int axis_value = getTensorData<int32_t>(axes())[0];
+ if (axis_value < 0 || axis_value >= input()->shape().num_dims())
+ {
+ throw std::runtime_error("Invalid axes value");
+ }
+ assert(input()->element_type() == output()->element_type());
+
+ output()->resize(input()->shape());
+}
+
+void Reverse::execute() const
+{
+ int axis_value = getTensorData<int32_t>(axes())[0];
+ switch (output()->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::reference_ops::Reverse<float>(axis_value, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ tflite::reference_ops::Reverse<uint8_t>(
+ axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported output type");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_REVERSE_H
+#define LUCI_INTERPRETER_KERNELS_REVERSE_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Reverse : public Kernel
+{
+public:
+ Reverse(const Tensor *input, const Tensor *axes, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *axes() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_REVERSE_H
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Reverse.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class ReverseTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(ReverseTest, DataTypes);
+
+TYPED_TEST(ReverseTest, MultiDimensions)
+{
+ // TypeParam
+ std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
+ Shape input_shape{4, 3, 2};
+ std::vector<int32_t> axis_data{1};
+ Shape axis_shape{1};
+
+ std::vector<TypeParam> output_data{5, 6, 3, 4, 1, 2, 11, 12, 9, 10, 7, 8,
+ 17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20};
+ std::vector<int32_t> output_shape{4, 3, 2};
+
+ Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data);
+ Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data);
+
+ Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+ Reverse kernel = Reverse(&input_tensor, &axis_tensor, &output_tensor);
+ kernel.configure();
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+ ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Slice.h"
+#include "Utils.h"
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+#include <cassert>
+#include <cstring>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+const int max_dim = 4;
+
+Slice::Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output)
+ : Kernel({input, begin, size}, {output})
+{
+}
+
+template <typename T>
+Shape calculateOutputShape(const Tensor *input, const Tensor *begin, const Tensor *size)
+{
+ Shape output_shape = Shape(input->shape().num_dims());
+ for (int idx = 0; idx < input->shape().num_dims(); idx++)
+ {
+ T size_value = getTensorData<T>(size)[idx];
+ if (size_value < 0)
+ {
+ if (size_value != -1)
+ {
+ throw std::runtime_error("Invalid size.");
+ }
+ size_value = input->shape().dim(idx) - getTensorData<T>(begin)[idx];
+ }
+ else
+ {
+ if (input->shape().dim(idx) < getTensorData<T>(begin)[idx] + size_value)
+ {
+ throw std::runtime_error("Invalid begin and size.");
+ }
+ }
+ output_shape.dim(idx) = static_cast<int>(size_value);
+ }
+ return output_shape;
+}
+
+template <typename T>
+void getBeginAndSizeVectors(int dimensions, const Tensor *begin, const Tensor *size,
+ std::vector<int> *begins, std::vector<int> *sizes)
+{
+ for (int idx = dimensions - 1; idx >= 0; --idx)
+ {
+ begins->push_back(getTensorData<T>(begin)[idx]);
+ sizes->push_back(getTensorData<T>(size)[idx]);
+ }
+}
+
+void Slice::configure()
+{
+ assert(input()->element_type() == output()->element_type());
+ assert(begin()->element_type() == DataType::S32 || begin()->element_type() == DataType::S64);
+ assert(size()->element_type() == DataType::S32 || size()->element_type() == DataType::S64);
+ assert(begin()->shape().num_dims() == 1);
+ assert(size()->shape().num_dims() == 1);
+ assert(input()->shape().num_dims() <= max_dim);
+
+ if (begin()->element_type() == DataType::S32)
+ {
+ output()->resize(calculateOutputShape<int32_t>(input(), begin(), size()));
+ }
+ else if (begin()->element_type() == DataType::S64)
+ {
+ output()->resize(calculateOutputShape<int64_t>(input(), begin(), size()));
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Slice::execute() const
+{
+ std::vector<int> begins;
+ begins.reserve(max_dim);
+ std::vector<int> sizes;
+ sizes.reserve(max_dim);
+ if (begin()->element_type() == DataType::S32)
+ {
+ getBeginAndSizeVectors<int32_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
+ }
+ else if (begin()->element_type() == DataType::S64)
+ {
+ getBeginAndSizeVectors<int64_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported begin type.");
+ }
+ for (int i = input()->shape().num_dims(); i < max_dim; ++i)
+ {
+ begins.push_back(0);
+ sizes.push_back(1);
+ }
+
+ assert(begins.size() == 4);
+ assert(sizes.size() == 4);
+ tflite::SliceParams op_params{};
+ op_params.begin_count = 4;
+ op_params.size_count = 4;
+ for (int i = 0; i < 4; i++)
+ {
+ op_params.begin[i] = begins[3 - i];
+ op_params.size[i] = sizes[3 - i];
+ }
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::optimized_ops::Slice(op_params, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ tflite::optimized_ops::Slice(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported input type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SLICE_H
+#define LUCI_INTERPRETER_KERNELS_SLICE_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Slice : public Kernel
+{
+public:
+ Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *begin() const { return _inputs[1]; }
+ const Tensor *size() const { return _inputs[2]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SLICE_H
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Slice.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class SliceTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(SliceTest, DataTypes);
+
+TYPED_TEST(SliceTest, SimpleTest)
+{
+ std::vector<TypeParam> input_data{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6};
+ Shape input_shape{3, 2, 3, 1};
+ std::vector<int32_t> begin_data{1, 0, 0, 0};
+ Shape begin_shape{4};
+ std::vector<int32_t> size_data{2, 1, -1, 1};
+ Shape size_shape{4};
+ std::vector<TypeParam> output_data{3, 3, 3, 5, 5, 5};
+ std::vector<int32_t> output_shape{2, 1, 3, 1};
+
+ Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data);
+ Tensor begin_tensor = makeInputTensor<DataType::S32>(begin_shape, begin_data);
+ Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+
+ Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+ Slice kernel(&input_tensor, &begin_tensor, &size_tensor, &output_tensor);
+ kernel.configure();
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+ ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
/*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
/*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
getElementType<float>());
+
+ SUCCEED();
}
TEST(TransposeConvTest, FloatTwoFiltersTest)
3352, 3652, 2760},
/*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
getElementType<float>());
-}
-TEST(TransposeConvTest, Uint8Simple)
-{
- // TODO
- // Implement GetDequantizedOutput Function.
- // Create Test for Uint8 Case
-}
-TEST(TransposeConvTest, Uint8FiltersTest)
-{
- // TODO
- // Implement GetDequantizedOutput Function.
- // Create Test for Uint8 Case
+ SUCCEED();
}
+// TODO Uint8Simple
+// Implement GetDequantizedOutput Function.
+// Create Test for Uint8 Case
+
+// TODO Uint8FiltersTest
+// Implement GetDequantizedOutput Function.
+// Create Test for Uint8 Case
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
+nnas_find_package(GTest REQUIRED)
+
set(SOURCES
GraphLoader.h
GraphLoader.cpp
target_link_libraries(luci_interpreter_loader
PUBLIC luci_lang luci_interpreter_core
PRIVATE luci_interpreter_kernels nncc_common)
+
+set(TEST_SOURCES KernelBuilder.test.cpp)
+
+GTest_AddTest(luci_interpreter_loader_test ${TEST_SOURCES})
+target_link_libraries(luci_interpreter_loader_test luci_interpreter_loader)
#include "loader/GraphLoader.h"
-#include "loader/ModuleLoader.h"
#include "loader/KernelBuilder.h"
#include <loco/IR/Algorithm.h>
switch (node->opcode())
{
// These nodes denote inputs / outputs of a graph.
- case luci::CircleOpcode::CONST:
+ case luci::CircleOpcode::CIRCLECONST:
case luci::CircleOpcode::CIRCLEINPUT:
case luci::CircleOpcode::CIRCLEOUTPUT:
+ case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE:
// The following nodes denote outputs of multiple-output nodes.
case luci::CircleOpcode::CIRCLEIFOUT:
case luci::CircleOpcode::CIRCLESPLITOUT:
} // namespace
-GraphLoader::GraphLoader(const ModuleLoader &module_loader, const loco::Graph *graph,
- RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
- std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
- : _module_loader(module_loader), _graph(graph), _runtime_graph(runtime_graph),
- _runtime_to_ir(runtime_to_ir), _node_to_tensor(node_to_tensor)
+GraphLoader::GraphLoader(
+ const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+ const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+ std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+ : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
+ _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
{
}
const luci::CircleQuantParam *params = node->quantparam();
quantization.scale.assign(params->scale.cbegin(), params->scale.cend());
quantization.zero_point.assign(params->zerop.cbegin(), params->zerop.cend());
+ quantization.quantized_dimension = params->quantized_dimension;
}
auto tensor = std::make_unique<Tensor>(node->dtype(), std::move(shape), std::move(quantization),
void GraphLoader::loadOperators()
{
- KernelBuilder kernel_builder(_module_loader, *this);
+ KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor);
// Create kernels for executable nodes. This has to be done in execution order.
for (const loco::Node *loco_node :
}
}
-void GraphLoader::load()
-{
- loadTensors();
- initInputOutputTensors();
- loadOperators();
-}
-
} // namespace luci_interpreter
namespace luci_interpreter
{
-class ModuleLoader;
-
class GraphLoader
{
public:
- GraphLoader(const ModuleLoader &module_loader, const loco::Graph *graph,
- RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+ GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+ const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
- void load();
-
- Tensor *getTensorForNode(const loco::Node *node) const { return _node_to_tensor.at(node); }
-
-private:
- void loadOperators();
- void initInputOutputTensors() const;
void loadTensors();
+ void initInputOutputTensors() const;
+ void loadOperators();
- const ModuleLoader &_module_loader;
+private:
const loco::Graph *_graph;
RuntimeGraph *_runtime_graph;
RuntimeToIR &_runtime_to_ir;
+ const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
};
#include "kernels/AveragePool2D.h"
#include "kernels/Concatenation.h"
#include "kernels/Conv2D.h"
+#include "kernels/DepthToSpace.h"
#include "kernels/DepthwiseConv2D.h"
#include "kernels/Elu.h"
#include "kernels/FullyConnected.h"
#include "kernels/Mul.h"
#include "kernels/Pad.h"
#include "kernels/Reshape.h"
+#include "kernels/Reverse.h"
+#include "kernels/Slice.h"
#include "kernels/Softmax.h"
#include "kernels/SpaceToDepth.h"
#include "kernels/Split.h"
#include "kernels/Unpack.h"
#include "kernels/Transpose.h"
#include "kernels/TransposeConv.h"
-#include "loader/GraphLoader.h"
-#include "loader/ModuleLoader.h"
#include <stdexcept>
const Tensor *KernelBuilder::getInputTensor(const loco::Node *node) const
{
- const Tensor *tensor = _graph_loader.getTensorForNode(node);
+ const Tensor *tensor = _node_to_tensor.at(node);
assert(tensor != nullptr);
return tensor;
}
const Tensor *KernelBuilder::getOptionalInputTensor(const loco::Node *node) const
{
- // TODO Revise this when optional inputs are implemented in the IR.
+ if (dynamic_cast<const luci::CircleOutputExclude *>(node))
+ {
+ return nullptr;
+ }
return getInputTensor(node);
}
Tensor *KernelBuilder::getOutputTensor(const loco::Node *node) const
{
- Tensor *tensor = _graph_loader.getTensorForNode(node);
+ Tensor *tensor = _node_to_tensor.at(node);
assert(tensor != nullptr);
return tensor;
}
RuntimeGraph *KernelBuilder::getRuntimeGraph(const loco::Graph *graph) const
{
- RuntimeGraph *runtime_graph = _module_loader.getRuntimeGraph(graph);
+ RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
assert(runtime_graph != nullptr);
return runtime_graph;
}
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleArgMax *node)
{
assert(node->arity() == 2);
- const Tensor *input1 = getInputTensor(node->input());
- const Tensor *input2 = getInputTensor(node->dimension());
+ const Tensor *input = getInputTensor(node->input());
+ const Tensor *axis = getInputTensor(node->dimension());
Tensor *output = getOutputTensor(node);
ArgMaxParams params{};
params.output_type = node->output_type();
- return std::make_unique<kernels::ArgMax>(input1, input2, output, params);
+ return std::make_unique<kernels::ArgMax>(input, axis, output, params);
}
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleAveragePool2D *node)
return std::make_unique<kernels::Conv2D>(input, filter, bias, output, params);
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleDepthToSpace *node)
+{
+ assert(node->arity() == 1);
+
+ const Tensor *input = getInputTensor(node->input());
+ Tensor *output = getOutputTensor(node);
+
+ DepthToSpaceParams params{};
+ params.block_size = node->block_size();
+
+ return std::make_unique<kernels::DepthToSpace>(input, output, params);
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleDepthwiseConv2D *node)
{
assert(node->arity() == 3);
assert(node->arity() == 3);
const Tensor *input = getInputTensor(node->input());
- const Tensor *filter = getInputTensor(node->weights());
+ const Tensor *weights = getInputTensor(node->weights());
const Tensor *bias = getOptionalInputTensor(node->bias());
Tensor *output = getOutputTensor(node);
FullyConnectedParams params{};
params.activation = node->fusedActivationFunction();
- return std::make_unique<kernels::FullyConnected>(input, filter, bias, output, params);
+ return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
}
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleIf *node)
else_graph);
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleInput *)
+{
+ throw std::runtime_error("Input node cannot be executed.");
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleL2Normalize *node)
{
assert(node->arity() == 1);
return std::make_unique<kernels::Logistic>(input, output);
}
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleInput *)
-{
- throw std::runtime_error("Input node cannot be executed.");
-}
-
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleMaxPool2D *node)
{
assert(node->arity() == 1);
return std::make_unique<kernels::Reshape>(input, shape, output);
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleReverseV2 *node)
+{
+ assert(node->arity() == 2);
+
+ const Tensor *input = getInputTensor(node->tensor());
+ const Tensor *axes = getInputTensor(node->axis());
+ Tensor *output = getOutputTensor(node);
+
+ return std::make_unique<kernels::Reverse>(input, axes, output);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSlice *node)
+{
+ assert(node->arity() == 3);
+
+ const Tensor *input = getInputTensor(node->input());
+ const Tensor *begin = getInputTensor(node->begin());
+ const Tensor *size = getInputTensor(node->size());
+
+ Tensor *output = getOutputTensor(node);
+
+ return std::make_unique<kernels::Slice>(input, begin, size, output);
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSoftmax *node)
{
assert(node->arity() == 1);
return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
}
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqueeze *node)
+{
+ assert(node->arity() == 1);
+
+ const Tensor *input = getInputTensor(node->input());
+ Tensor *output = getOutputTensor(node);
+
+ SqueezeParams params{};
+ params.squeeze_dims = node->squeeze_dims();
+
+ return std::make_unique<kernels::Squeeze>(input, output, params);
+}
+
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleStridedSlice *node)
{
assert(node->arity() == 4);
return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
}
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqueeze *node)
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTranspose *node)
{
- assert(node->arity() == 1);
+ assert(node->arity() == 2);
- const Tensor *input = getInputTensor(node->input());
+ const Tensor *input = getInputTensor(node->a());
+ const Tensor *perm = getInputTensor(node->perm());
Tensor *output = getOutputTensor(node);
- SqueezeParams params{};
- assert(node->squeeze_dims().size() <= 4);
- for (size_t i = 0; i < node->squeeze_dims().size(); i++)
- {
- params.squeeze_dims.push_back(node->squeeze_dims().at(i));
- }
-
- return std::make_unique<kernels::Squeeze>(input, output, params);
+ return std::make_unique<kernels::Transpose>(input, perm, output);
}
std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTransposeConv *node)
return std::make_unique<kernels::Unpack>(input, std::move(outputs), params);
}
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTranspose *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input = getInputTensor(node->a());
- const Tensor *perm = getInputTensor(node->perm());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Transpose>(input, perm, output);
-}
-
} // namespace luci_interpreter
#include <memory>
#include <vector>
+#include <unordered_map>
namespace luci_interpreter
{
-class GraphLoader;
-class ModuleLoader;
-
class KernelBuilder : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>
{
public:
- KernelBuilder(const ModuleLoader &module_loader, const GraphLoader &graph_loader)
- : _module_loader(module_loader), _graph_loader(graph_loader)
+ KernelBuilder(
+ const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+ const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+ : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
{
}
std::unique_ptr<Kernel> visit(const luci::CircleConcatenation *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleConv2D *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleConst *node) override;
+ std::unique_ptr<Kernel> visit(const luci::CircleDepthToSpace *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleDepthwiseConv2D *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleElu *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleFullyConnected *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleOutput *node) override;
std::unique_ptr<Kernel> visit(const luci::CirclePad *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleReshape *node) override;
+ std::unique_ptr<Kernel> visit(const luci::CircleReverseV2 *node) override;
+ std::unique_ptr<Kernel> visit(const luci::CircleSlice *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSoftmax *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSpaceToDepth *node) override;
std::unique_ptr<Kernel> visit(const luci::CircleSplit *node) override;
RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const;
private:
- const ModuleLoader &_module_loader;
- const GraphLoader &_graph_loader;
+ const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
+ const std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
};
} // namespace luci_interpreter
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/GraphLoader.h"
+#include "loader/KernelBuilder.h"
+
+#include <kernels/Add.h>
+#include <kernels/ArgMax.h>
+#include <kernels/AveragePool2D.h>
+#include <kernels/Concatenation.h>
+#include <kernels/Conv2D.h>
+#include <kernels/DepthToSpace.h>
+#include <kernels/DepthwiseConv2D.h>
+#include <kernels/Elu.h>
+#include <kernels/FullyConnected.h>
+#include <kernels/L2Normalize.h>
+#include <kernels/L2Pool2D.h>
+#include <kernels/LeakyRelu.h>
+#include <kernels/LocalResponseNormalization.h>
+#include <kernels/Logistic.h>
+#include <kernels/MaxPool2D.h>
+#include <kernels/Mean.h>
+#include <kernels/Mul.h>
+#include <kernels/Pad.h>
+#include <kernels/Reshape.h>
+#include <kernels/Reverse.h>
+#include <kernels/Slice.h>
+#include <kernels/Softmax.h>
+#include <kernels/SpaceToDepth.h>
+#include <kernels/Split.h>
+#include <kernels/Squeeze.h>
+#include <kernels/StridedSlice.h>
+#include <kernels/Transpose.h>
+#include <kernels/TransposeConv.h>
+#include <kernels/Unpack.h>
+
+#include <gmock/gmock.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class KernelBuilderTest : public Test
+{
+protected:
+ luci::CircleInput *createInputNode() { return createNode<luci::CircleInput>(); }
+
+ template <typename NodeT, typename... Args> NodeT *createNode(Args &&... args)
+ {
+ auto *node = _graph.nodes()->create<NodeT>(std::forward<Args>(args)...);
+ // The actual type does not matter for the purpose of the tests.
+ // NOTE The type is meaningless for nodes with multiple outputs (corresponding *Out nodes carry
+ // actual output types).
+ node->dtype(loco::DataType::FLOAT32);
+ return node;
+ }
+
+ template <typename NodeOutT> NodeOutT *createNodeOut(loco::Node *node, int index)
+ {
+ auto *node_out = createNode<NodeOutT>();
+ node_out->input(node);
+ node_out->index(index);
+ return node_out;
+ }
+
+ template <typename KernelT> std::unique_ptr<KernelT> buildKernel(const luci::CircleNode *op)
+ {
+ std::unordered_map<const loco::Graph *, RuntimeGraph *> graph_to_runtime_graph;
+
+ RuntimeGraph runtime_graph(nullptr);
+ RuntimeToIR runtime_to_ir;
+ GraphLoader graph_loader(&_graph, &runtime_graph, runtime_to_ir, graph_to_runtime_graph,
+ _node_to_tensor);
+ graph_loader.loadTensors();
+
+ KernelBuilder kernel_builder(graph_to_runtime_graph, _node_to_tensor);
+
+ auto kernel = op->accept(&kernel_builder);
+ return std::unique_ptr<KernelT>(dynamic_cast<KernelT *>(kernel.release()));
+ }
+
+ void checkTensor(const Tensor *tensor, const loco::Node *node)
+ {
+ EXPECT_THAT(tensor, Eq(_node_to_tensor.at(node)));
+ }
+
+private:
+ loco::Graph _graph;
+ std::unordered_map<const loco::Node *, Tensor *> _node_to_tensor;
+};
+
+TEST_F(KernelBuilderTest, Add)
+{
+ auto *input1 = createInputNode();
+ auto *input2 = createInputNode();
+
+ auto *op = createNode<luci::CircleAdd>();
+ op->x(input1);
+ op->y(input2);
+
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::Add>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input1(), input1);
+ checkTensor(kernel->input2(), input2);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, ArgMax)
+{
+ auto *input = createInputNode();
+ auto *axis = createInputNode();
+
+ auto *op = createNode<luci::CircleArgMax>();
+ op->input(input);
+ op->dimension(axis);
+
+ op->output_type(loco::DataType::FLOAT32);
+
+ auto kernel = buildKernel<kernels::ArgMax>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->axis(), axis);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().output_type, Eq(op->output_type()));
+}
+
+TEST_F(KernelBuilderTest, AveragePool2D)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleAveragePool2D>();
+ op->value(input);
+
+ op->padding(luci::Padding::SAME);
+ op->filter()->h(11);
+ op->filter()->w(13);
+ op->stride()->h(17);
+ op->stride()->w(19);
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::AveragePool2D>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+ EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
+ EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
+ EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+ EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Concatenation)
+{
+ auto *input1 = createInputNode();
+ auto *input2 = createInputNode();
+
+ auto *op = createNode<luci::CircleConcatenation>(2);
+ op->values(0, input1);
+ op->values(1, input2);
+ op->axis(11);
+
+ auto kernel = buildKernel<kernels::Concatenation>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(0), input1);
+ checkTensor(kernel->input(1), input2);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+}
+
+TEST_F(KernelBuilderTest, Conv2D)
+{
+ auto *input = createInputNode();
+ auto *filter = createInputNode();
+ auto *bias = createInputNode();
+
+ auto *op = createNode<luci::CircleConv2D>();
+ op->input(input);
+ op->filter(filter);
+ op->bias(bias);
+
+ op->padding(luci::Padding::SAME);
+ op->stride()->h(11);
+ op->stride()->w(13);
+ op->dilation()->h(17);
+ op->dilation()->w(19);
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::Conv2D>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->filter(), filter);
+ checkTensor(kernel->bias(), bias);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+ EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+ EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+ EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h()));
+ EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w()));
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, DepthToSpace)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleDepthToSpace>();
+ op->input(input);
+
+ op->block_size(11);
+
+ auto kernel = buildKernel<kernels::DepthToSpace>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().block_size, Eq(op->block_size()));
+}
+
+TEST_F(KernelBuilderTest, DepthwiseConv2D)
+{
+ auto *input = createInputNode();
+ auto *filter = createInputNode();
+ auto *bias = createInputNode();
+
+ auto *op = createNode<luci::CircleDepthwiseConv2D>();
+ op->input(input);
+ op->filter(filter);
+ op->bias(bias);
+
+ op->padding(luci::Padding::SAME);
+ op->depthMultiplier(11);
+ op->stride()->h(13);
+ op->stride()->w(17);
+ op->dilation()->h(19);
+ op->dilation()->w(23);
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::DepthwiseConv2D>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->filter(), filter);
+ checkTensor(kernel->bias(), bias);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+ EXPECT_THAT(kernel->params().depth_multiplier, Eq(op->depthMultiplier()));
+ EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+ EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+ EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h()));
+ EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w()));
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Elu)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleElu>();
+ op->features(input);
+
+ auto kernel = buildKernel<kernels::Elu>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, FullyConnected)
+{
+ auto *input = createInputNode();
+ auto *weights = createInputNode();
+ auto *bias = createInputNode();
+
+ auto *op = createNode<luci::CircleFullyConnected>();
+ op->input(input);
+ op->weights(weights);
+ op->bias(bias);
+
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::FullyConnected>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->weights(), weights);
+ checkTensor(kernel->bias(), bias);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, L2Normalize)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleL2Normalize>();
+ op->x(input);
+
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::L2Normalize>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, L2Pool2D)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleL2Pool2D>();
+ op->value(input);
+
+ op->padding(luci::Padding::SAME);
+ op->filter()->h(11);
+ op->filter()->w(13);
+ op->stride()->h(17);
+ op->stride()->w(19);
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::L2Pool2D>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+ EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
+ EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
+ EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+ EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, LeakyRelu)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleLeakyRelu>();
+ op->features(input);
+
+ op->alpha(11.0f);
+
+ auto kernel = buildKernel<kernels::LeakyRelu>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().alpha, Eq(op->alpha()));
+}
+
+TEST_F(KernelBuilderTest, LocalResponseNormalization)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleLocalResponseNormalization>();
+ op->input(input);
+
+ op->radius(11);
+ op->bias(13.0f);
+ op->alpha(15.0f);
+ op->beta(17.0f);
+
+ auto kernel = buildKernel<kernels::LocalResponseNormalization>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().radius, Eq(op->radius()));
+ EXPECT_THAT(kernel->params().bias, Eq(op->bias()));
+ EXPECT_THAT(kernel->params().alpha, Eq(op->alpha()));
+ EXPECT_THAT(kernel->params().beta, Eq(op->beta()));
+}
+
+TEST_F(KernelBuilderTest, Logistic)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleLogistic>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::Logistic>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, MaxPool2D)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleMaxPool2D>();
+ op->value(input);
+
+ op->padding(luci::Padding::SAME);
+ op->filter()->h(11);
+ op->filter()->w(13);
+ op->stride()->h(17);
+ op->stride()->w(19);
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::MaxPool2D>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+ EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
+ EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
+ EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+ EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Mean)
+{
+ auto *input = createInputNode();
+ auto *axes = createInputNode();
+
+ auto *op = createNode<luci::CircleMean>();
+ op->input(input);
+ op->reduction_indices(axes);
+
+ op->keep_dims(true);
+
+ auto kernel = buildKernel<kernels::Mean>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->axes(), axes);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().keep_dims, Eq(op->keep_dims()));
+}
+
+TEST_F(KernelBuilderTest, Mul)
+{
+ auto *input1 = createInputNode();
+ auto *input2 = createInputNode();
+
+ auto *op = createNode<luci::CircleMul>();
+ op->x(input1);
+ op->y(input2);
+
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::Mul>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input1(), input1);
+ checkTensor(kernel->input2(), input2);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Pad)
+{
+ auto *input = createInputNode();
+ auto *paddings = createInputNode();
+
+ auto *op = createNode<luci::CirclePad>();
+ op->input(input);
+ op->paddings(paddings);
+
+ auto kernel = buildKernel<kernels::Pad>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->paddings(), paddings);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Reshape)
+{
+ auto *input = createInputNode();
+ auto *shape = createInputNode();
+
+ auto *op = createNode<luci::CircleReshape>();
+ op->tensor(input);
+ op->shape(shape);
+
+ auto kernel = buildKernel<kernels::Reshape>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->shape(), shape);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, ReverseV2)
+{
+ auto *input = createInputNode();
+ auto *axes = createInputNode();
+
+ auto *op = createNode<luci::CircleReverseV2>();
+ op->tensor(input);
+ op->axis(axes);
+
+ auto kernel = buildKernel<kernels::Reverse>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->axes(), axes);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Slice)
+{
+ auto *input = createInputNode();
+ auto *begin = createInputNode();
+ auto *size = createInputNode();
+
+ auto *op = createNode<luci::CircleSlice>();
+ op->input(input);
+ op->begin(begin);
+ op->size(size);
+
+ auto kernel = buildKernel<kernels::Slice>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->begin(), begin);
+ checkTensor(kernel->size(), size);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Softmax)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleSoftmax>();
+ op->logits(input);
+
+ op->beta(11.0f);
+
+ auto kernel = buildKernel<kernels::Softmax>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().beta, Eq(op->beta()));
+}
+
+TEST_F(KernelBuilderTest, SpaceToDepth)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleSpaceToDepth>();
+ op->input(input);
+
+ op->block_size(11);
+
+ auto kernel = buildKernel<kernels::SpaceToDepth>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().block_size, op->block_size());
+}
+
+TEST_F(KernelBuilderTest, Split)
+{
+ auto *axis = createInputNode();
+ auto *input = createInputNode();
+ auto *op = createNode<luci::CircleSplit>();
+ auto *output1 = createNodeOut<luci::CircleSplitOut>(op, 0);
+ auto *output2 = createNodeOut<luci::CircleSplitOut>(op, 1);
+
+ op->split_dim(axis);
+ op->input(input);
+
+ op->num_split(2);
+
+ auto kernel = buildKernel<kernels::Split>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->axis(), axis);
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(0), output1);
+ checkTensor(kernel->output(1), output2);
+}
+
+TEST_F(KernelBuilderTest, Squeeze)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleSqueeze>();
+ op->input(input);
+
+ op->squeeze_dims({11, 13});
+
+ auto kernel = buildKernel<kernels::Squeeze>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().squeeze_dims, ElementsAreArray(op->squeeze_dims()));
+}
+
+TEST_F(KernelBuilderTest, StridedSlice)
+{
+ auto *input = createInputNode();
+ auto *begin = createInputNode();
+ auto *end = createInputNode();
+ auto *strides = createInputNode();
+
+ auto *op = createNode<luci::CircleStridedSlice>();
+ op->input(input);
+ op->begin(begin);
+ op->end(end);
+ op->strides(strides);
+
+ op->begin_mask(11);
+ op->ellipsis_mask(13);
+ op->end_mask(17);
+ op->new_axis_mask(19);
+ op->shrink_axis_mask(23);
+
+ auto kernel = buildKernel<kernels::StridedSlice>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->begin(), begin);
+ checkTensor(kernel->end(), end);
+ checkTensor(kernel->strides(), strides);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().begin_mask, Eq(op->begin_mask()));
+ EXPECT_THAT(kernel->params().ellipsis_mask, Eq(op->ellipsis_mask()));
+ EXPECT_THAT(kernel->params().end_mask, Eq(op->end_mask()));
+ EXPECT_THAT(kernel->params().new_axis_mask, Eq(op->new_axis_mask()));
+ EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask()));
+}
+
+TEST_F(KernelBuilderTest, Transpose)
+{
+ auto *input = createInputNode();
+ auto *perm = createInputNode();
+
+ auto *op = createNode<luci::CircleTranspose>();
+ op->a(input);
+ op->perm(perm);
+
+ auto kernel = buildKernel<kernels::Transpose>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->perm(), perm);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, TransposeConv)
+{
+ auto *output_shape = createInputNode();
+ auto *filter = createInputNode();
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleTransposeConv>();
+ op->inputSizes(output_shape);
+ op->filter(filter);
+ op->outBackprop(input);
+
+ op->padding(luci::Padding::SAME);
+ op->stride()->h(11);
+ op->stride()->w(13);
+
+ auto kernel = buildKernel<kernels::TransposeConv>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->output_shape(), output_shape);
+ checkTensor(kernel->filter(), filter);
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+ EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+ EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+}
+
+TEST_F(KernelBuilderTest, Unpack)
+{
+ auto *input = createInputNode();
+ auto *op = createNode<luci::CircleUnpack>();
+ auto *output1 = createNodeOut<luci::CircleUnpackOut>(op, 0);
+ auto *output2 = createNodeOut<luci::CircleUnpackOut>(op, 1);
+
+ op->value(input);
+
+ op->num(2);
+ op->axis(11);
+
+ auto kernel = buildKernel<kernels::Unpack>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(0), output1);
+ checkTensor(kernel->output(1), output2);
+ EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+}
+
+TEST_F(KernelBuilderTest, NonExisting1_NEG)
+{
+ auto *op = createNode<luci::CircleConst>();
+ ASSERT_ANY_THROW(buildKernel<Kernel>(op));
+}
+
+TEST_F(KernelBuilderTest, NonExisting2_NEG)
+{
+ auto *op = createNode<luci::CircleInput>();
+ ASSERT_ANY_THROW(buildKernel<Kernel>(op));
+}
+
+TEST_F(KernelBuilderTest, NonExisting3_NEG)
+{
+ auto *op = createNode<luci::CircleOutput>();
+ ASSERT_ANY_THROW(buildKernel<Kernel>(op));
+}
+
+} // namespace
+} // namespace luci_interpreter
{
const loco::Graph *graph = _module->graph(i);
RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
- GraphLoader loader(*this, graph, runtime_graph, _runtime_to_ir, _node_to_tensor);
- loader.load();
+ GraphLoader loader(graph, runtime_graph, _runtime_to_ir, _graph_to_runtime_graph,
+ _node_to_tensor);
+ loader.loadTensors();
+ loader.initInputOutputTensors();
+ loader.loadOperators();
}
}
void load();
- RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const
- {
- return _graph_to_runtime_graph.at(graph);
- }
-
private:
const luci::Module *_module;
RuntimeModule *_runtime_module;
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh"
"${CMAKE_CURRENT_BINARY_DIR}"
"${ARTIFACTS_BIN_PATH}"
- "${NNCC_OVERLAY_DIR}/venv_1_13_2"
+ "${NNCC_OVERLAY_DIR}/venv_2_3_0"
${LUCI_VALUE_TESTS}
)
#
# HOW TO USE
#
-# ./evalverify.sh <path/to/work_dir> <TEST 1> <TEST 2> ...
-# work_dir : build directory of luci-value-test (ex: build/compiler/luci-value-test)
+# ./evalverify.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <TEST 1> <TEST 2> ...
+# bin_dir : build directory of luci-value-test (ex: build/compiler/luci-value-test)
+# work_dir : artifacts directoy where test materials exist
+# venv_dir : python virtual environment home directory
VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier.py"
input_data = np.array(
np.random.randint(0, 256, size=input_details["shape"]),
input_details["dtype"])
+ elif input_details["dtype"] == np.bool_:
+ input_data = np.array(
+ np.random.choice(a=[True, False], size=input_details["shape"]),
+ input_details["dtype"])
else:
raise SystemExit("Unsupported input dtype")
# Do inference
interpreter.invoke()
-# Get reference output data.
-assert len(interpreter.get_output_details()) == 1 # TODO: Support multiple outputs
-output_details = interpreter.get_output_details()[0]
-ref_output_data = interpreter.get_tensor(output_details["index"])
-
# Execute luci interpreter.
subprocess.run(
[
str(num_inputs), circle_model + ".input", circle_model + ".output"
],
check=True)
-output_data = np.fromfile(circle_model + ".output", output_details["dtype"])
-shape_file = open(circle_model + ".output.shape", 'r')
-output_shape = [int(i) for i in shape_file.read().split(',')]
-shape_file.close()
-luci_output_data = np.reshape(output_data, output_shape)
# Compare the results.
-try:
- if output_details["dtype"] == np.uint8:
- if np.allclose(luci_output_data, ref_output_data, rtol=0, atol=0) == False:
- raise SystemExit("Execution result of " + tflite_model +
- " does not match with " + circle_model)
- elif output_details["dtype"] == np.float32:
- if np.allclose(
- luci_output_data, ref_output_data, rtol=1.e-5, atol=1.e-5) == False:
- raise SystemExit("Execution result of " + tflite_model +
- " does not match with " + circle_model)
- else:
- raise SystemExit("Unsupported data type: ", output_details["dtype"])
-except:
- print(traceback.format_exc())
- quit(255)
+for idx in range(len(interpreter.get_output_details())):
+ output_details = interpreter.get_output_details()[idx]
+ output_data = np.fromfile(circle_model + ".output" + str(idx),
+ output_details["dtype"])
+ shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
+ output_shape = [int(i) for i in shape_file.read().split(',')]
+ luci_output_data = np.reshape(output_data, output_shape)
+ try:
+ if output_details["dtype"] == np.uint8:
+ if np.allclose(
+ luci_output_data,
+ interpreter.get_tensor(
+ interpreter.get_output_details()[idx]["index"]),
+ rtol=0,
+ atol=0) == False:
+ raise SystemExit("Execution result of " + tflite_model +
+ " does not match with " + circle_model)
+ elif output_details["dtype"] == np.float32:
+ if np.allclose(
+ luci_output_data,
+ interpreter.get_tensor(
+ interpreter.get_output_details()[idx]["index"]),
+ rtol=1.e-5,
+ atol=1.e-5) == False:
+ raise SystemExit("Execution result of " + tflite_model +
+ " does not match with " + circle_model)
+ elif output_details["dtype"] == np.int64:
+ if np.allclose(
+ luci_output_data,
+ interpreter.get_tensor(
+ interpreter.get_output_details()[idx]["index"]),
+ rtol=0,
+ atol=0) == False:
+ raise SystemExit("Execution result of " + tflite_model +
+ " does not match with " + circle_model)
+ elif output_details["dtype"] == np.int32:
+ if np.allclose(
+ luci_output_data,
+ interpreter.get_tensor(
+ interpreter.get_output_details()[idx]["index"]),
+ rtol=0,
+ atol=0) == False:
+ raise SystemExit("Execution result of " + tflite_model +
+ " does not match with " + circle_model)
+ else:
+ raise SystemExit("Unsupported data type: ", output_details["dtype"])
+ except:
+ print(traceback.format_exc())
+ quit(255)
quit(0)
#addeval(Abs_000)
addeval(Add_000)
+#addeval(Add_001)
addeval(Add_U8_000)
-#addeval(ArgMax_000)
-#addeval(ArgMax_001)
-#addeval(ArgMax_002)
-#addeval(ArgMax_003)
-#addeval(ArgMax_U8_000)
-#addeval(ArgMax_U8_001)
-#addeval(ArgMax_U8_002)
-#addeval(ArgMax_U8_003)
+#addeval(AddN_000)
+addeval(ArgMax_000)
+addeval(ArgMax_001)
+addeval(ArgMax_002)
+addeval(ArgMax_003)
+addeval(ArgMax_U8_000)
+addeval(ArgMax_U8_001)
+addeval(ArgMax_U8_002)
+addeval(ArgMax_U8_003)
+#addeval(ArgMin_000)
+#addeval(ArgMin_001)
+#addeval(ArgMin_002)
+#addeval(ArgMin_003)
+#addeval(ArgMin_U8_000)
+#addeval(ArgMin_U8_001)
+#addeval(ArgMin_U8_002)
+#addeval(ArgMin_U8_003)
addeval(AveragePool2D_000)
+#addeval(BatchMatMul_000)
#addeval(BatchMatMulV2_000)
#addeval(BatchMatMulV2_001)
#addeval(BatchToSpaceND_000)
#addeval(Cast_000)
+#addeval(Cast_001)
+#addeval(Ceil_000)
addeval(Concatenation_000)
addeval(Concatenation_U8_000)
addeval(Conv2D_000)
addeval(Conv2D_001)
addeval(Conv2D_002)
+#addeval(Conv2D_003)
addeval(Conv2D_U8_000)
addeval(Conv2D_U8_001)
#addeval(Cos_000)
+#addeval(DepthToSpace_000)
addeval(DepthwiseConv2D_000)
addeval(DepthwiseConv2D_U8_000)
+#addeval(DepthwiseConv2D_U8_001)
+addeval(DepthwiseConv2D_001)
#addeval(Div_000)
+addeval(ELU_000)
#addeval(Equal_000)
#addeval(Exp_000)
+#addeval(ExpandDims_000)
+#addeval(ExpandDims_001)
+#addeval(ExpandDims_002)
+#addeval(ExpandDims_003)
+#addeval(Fill_000)
+#addeval(Fill_001)
+#addeval(Floor_000)
+#addeval(FloorDiv_000)
+#addeval(FloorDiv_001)
+#addeval(FloorMod_000)
+#addeval(FloorMod_001)
addeval(FullyConnected_000)
addeval(FullyConnected_001)
-#addeval(FullyConnected_002)
+addeval(FullyConnected_002)
#addeval(FullyConnected_U8_000)
#addeval(Gather_000)
-#addeval(If_000)
-#addeval(If_001)
+#addeval(GatherNd_000)
+#addeval(Greater_000)
+#addeval(GreaterEqual_000)
+addeval(If_000)
+addeval(If_001)
+addeval(L2Normalize_000)
+addeval(L2Pool2D_000)
+#addeval(L2Pool2D_U8_000)
+addeval(LeakyRelu_000)
+#addeval(Less_000)
+#addeval(LessEqual_000)
+addeval(LocalResponseNormalization_000)
+#addeval(Log_000)
+#addeval(LogicalAnd_000)
#addeval(LogicalNot_000)
#addeval(LogicalOr_000)
-#addeval(Logistic_000)
+addeval(Logistic_000)
+#addeval(LogSoftmax_000)
+#addeval(MatMul_000)
+#addeval(MatrixDiag_000)
+#addeval(MatrixSetDiag_000)
+#addeval(Maximum_000)
addeval(MaxPool2D_000)
addeval(MaxPool2D_U8_000)
addeval(Mean_000)
addeval(Mean_001)
-addeval(Mean_U8_000)
+#addeval(Mean_U8_000)
+#addeval(Minimum_000)
+#addeval(MirrorPad_000)
addeval(Mul_000)
#addeval(Mul_U8_000)
+#addeval(Neg_000)
+#addeval(NotEqual_000)
+#addeval(OneHot_000)
+#addeval(OneHot_001)
+#addeval(OneHot_002)
+#addeval(OneHot_003)
#addeval(Pack_000)
#addeval(Pack_U8_000)
addeval(Pad_000)
addeval(Pad_U8_000)
+#addeval(Pow_000)
+#addeval(PRelu_000)
+#addeval(Range_000)
+#addeval(Rank_000)
+#addeval(ReduceAny_000)
+#addeval(ReduceAny_001)
+#addeval(ReduceAny_002)
+#addeval(ReduceAny_003)
+#addeval(ReduceMax_000)
+#addeval(ReduceMin_000)
#addeval(ReduceProd_000)
#addeval(ReduceProd_001)
#addeval(ReduceProd_002)
#addeval(ReduceProd_003)
#addeval(ReLU_000)
+#addeval(ReLU6_000)
+#addeval(ReLUN1To1_000)
addeval(Reshape_000)
addeval(Reshape_001)
addeval(Reshape_002)
#addeval(Reshape_003)
addeval(Reshape_U8_000)
+#addeval(ResizeBilinear_000)
+#addeval(ResizeNearestNeighbor_000)
+#addeval(ReverseSequence_000)
+#addeval(ReverseV2_000)
+#addeval(Round_000)
#addeval(Rsqrt_000)
+#addeval(ScatterNd_000)
+#addeval(SegmentSum_000)
+#addeval(Select_000)
+#addeval(Select_001)
+#addeval(Select_002)
+#addeval(SelectV2_000)
+#addeval(SelectV2_001)
+#addeval(SelectV2_002)
+#addeval(Shape_000)
#addeval(Sin_000)
+addeval(Slice_000)
addeval(Softmax_000)
#addeval(Softmax_U8_000)
#addeval(SpaceToBatchND_000)
#addeval(SpaceToBatchND_001)
#addeval(SpaceToBatchND_002)
#addeval(SpaceToBatchND_003)
-#addeval(StridedSlice_000)
-#addeval(StridedSlice_001)
+addeval(SpaceToDepth_000)
+#addeval(SparseToDense_000)
+addeval(Split_000)
+#addeval(SplitV_000)
+#addeval(Sqrt_000)
+#addeval(Square_000)
+#addeval(SquaredDifference_000)
+addeval(Squeeze_000)
+addeval(StridedSlice_000)
+addeval(StridedSlice_001)
+addeval(StridedSlice_002)
#addeval(Sub_000)
#addeval(Sub_U8_000)
+#addeval(Sum_000)
+#addeval(Sum_001)
#addeval(Tanh_000)
#addeval(Tile_000)
#addeval(Tile_U8_000)
-#addeval(Transpose_000)
-#addeval(Unpack_000)
-#addeval(Unpack_001)
-#addeval(Unpack_002)
+#addeval(TopKV2_000)
+#addeval(TopKV2_001)
+addeval(Transpose_000)
+#addeval(TransposeConv_000)
+addeval(Unpack_000)
+addeval(Unpack_001)
+addeval(Unpack_002)
+addeval(Unpack_003)
+#addeval(Where_000)
+#addeval(Where_001)
#addeval(While_000)
#addeval(While_001)
+#addeval(While_002)
+#addeval(While_003)
+#addeval(YUV_TO_RGB_U8_000)
+#addeval(ZerosLike_000)
assert(num_inputs == input_nodes.size());
for (int32_t i = 0; i < num_inputs; i++)
{
- const auto *input_node = dynamic_cast<const luci::CircleInput *>(input_nodes[i]);
+ const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
std::vector<char> input_data(getTensorSize(input_node));
readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data.data(),
input_data.size());
// Get output.
const auto output_nodes = loco::output_nodes(module->graph());
- // TODO: Support multiple outputs
- assert(output_nodes.size() == 1);
- const auto *output_node = dynamic_cast<const luci::CircleOutput *>(output_nodes[0]);
- std::vector<char> output_data(getTensorSize(output_node));
- interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
-
- // Output data is written in ${output_file}
- // (ex: Add.circle.output)
- // Output shape is written in ${output_file}.shape
- // (ex: Add.circle.output.shape)
- // TODO: Use HDF5 file format
- writeDataToFile(output_file, output_data.data(), output_data.size());
- auto shape_str = std::to_string(output_node->dim(0).value());
- for (int i = 1; i < output_node->rank(); i++)
+ for (int i = 0; i < module->graph()->outputs()->size(); i++)
{
- shape_str += ",";
- shape_str += std::to_string(output_node->dim(i).value());
+ const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+ std::vector<char> output_data(getTensorSize(output_node));
+ interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+
+ // Output data is written in ${output_file}
+ // (ex: Add.circle.output0)
+ // Output shape is written in ${output_file}.shape
+ // (ex: Add.circle.output0.shape)
+ writeDataToFile(std::string(output_file) + std::to_string(i), output_data.data(),
+ output_data.size());
+ // In case of Tensor output is Scalar value.
+ // The output tensor with rank 0 is treated as a scalar with shape (1)
+ if (output_node->rank() == 0)
+ {
+ writeDataToFile(std::string(output_file) + std::to_string(i) + ".shape", "1", 1);
+ }
+ else
+ {
+ auto shape_str = std::to_string(output_node->dim(0).value());
+ for (int j = 1; j < output_node->rank(); j++)
+ {
+ shape_str += ",";
+ shape_str += std::to_string(output_node->dim(j).value());
+ }
+ writeDataToFile(std::string(output_file) + std::to_string(i) + ".shape", shape_str.c_str(),
+ shape_str.size());
+ }
}
- writeDataToFile(std::string(output_file) + ".shape", shape_str.c_str(), shape_str.size());
return EXIT_SUCCESS;
}
void visit(luci::CircleMirrorPad *) final;
void visit(luci::CircleMul *) final;
void visit(luci::CircleNeg *) final;
+ void visit(luci::CircleNonMaxSuppressionV4 *) final;
void visit(luci::CircleNotEqual *) final;
void visit(luci::CircleOneHot *) final;
void visit(luci::CirclePack *) final;
void visit(luci::CircleTopKV2 *) final;
void visit(luci::CircleTranspose *) final;
void visit(luci::CircleTransposeConv *) final;
+ void visit(luci::CircleUnique *) final;
void visit(luci::CircleUnpack *) final;
void visit(luci::CircleWhere *) final;
void visit(luci::CircleWhile *) final;
// Virtual for multiple-outputs
void visit(luci::CircleCustomOut *) final {}
void visit(luci::CircleIfOut *) final {}
+ void visit(luci::CircleNonMaxSuppressionV4Out *) final {}
void visit(luci::CircleSplitOut *) final {}
void visit(luci::CircleSplitVOut *) final {}
void visit(luci::CircleTopKV2Out *) final {}
+ void visit(luci::CircleUniqueOut *) final {}
void visit(luci::CircleUnpackOut *) final {}
void visit(luci::CircleWhileOut *) final {}
{
export_simple(node, circle::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
circle::BuiltinOptions_LocalResponseNormalizationOptions,
- CreateLocalResponseNormalizationOptions(builder).Union());
+ CreateLocalResponseNormalizationOptions(builder, node->radius(), node->bias(),
+ node->alpha(), node->beta())
+ .Union());
}
void OperationExporter::visit(luci::CircleLog *node)
CreateNegOptions(builder).Union());
}
+void OperationExporter::visit(luci::CircleNonMaxSuppressionV4 *node)
+{
+ auto nms_outs = loco::succs(node);
+ assert(nms_outs.size() == 2);
+
+ uint32_t op_idx =
+ md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4, node->op_version());
+ std::vector<int32_t> inputs_vec{
+ get_tensor_index(node->boxes()), get_tensor_index(node->scores()),
+ get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
+ get_tensor_index(node->score_threshold()),
+ };
+ std::vector<int32_t> outputs_vec;
+
+ for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : nms_outs)
+ {
+ auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out);
+ if (nms_out->index() == static_cast<int32_t>(idx))
+ {
+ outputs_vec.push_back(get_tensor_index(nms_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid NonMaxSuppressionV4 output");
+ }
+ }
+
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateNonMaxSuppressionV4Options(builder);
+ auto op_offset =
+ CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
void OperationExporter::visit(luci::CircleNotEqual *node)
{
export_simple(node, circle::BuiltinOperator_NOT_EQUAL, circle::BuiltinOptions_NotEqualOptions,
{
export_simple(node, circle::BuiltinOperator_SPACE_TO_DEPTH,
circle::BuiltinOptions_SpaceToDepthOptions,
- CreateSpaceToDepthOptions(builder).Union());
+ CreateSpaceToDepthOptions(builder, node->block_size()).Union());
}
void OperationExporter::visit(luci::CircleSparseToDense *node)
.Union());
}
+void OperationExporter::visit(luci::CircleUnique *node)
+{
+ auto unique_outs = loco::succs(node);
+ assert(int32_t(unique_outs.size()) == 2);
+ uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
+
+ std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+ std::vector<int32_t> outputs_vec;
+
+ for (int32_t index = 0; index < 2; index++)
+ {
+ // store in order of index
+ bool found = false;
+ for (auto out : unique_outs)
+ {
+ auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out);
+ if (unique_out->index() == index)
+ {
+ outputs_vec.push_back(get_tensor_index(unique_out));
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ INTERNAL_EXN("Invalid Unique output");
+ }
+ }
+
+ auto inputs = builder.CreateVector(inputs_vec);
+ auto outputs = builder.CreateVector(outputs_vec);
+ auto options = CreateUniqueOptions(builder, to_circle_tensortype(node->idx_out_type()));
+ auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+ circle::BuiltinOptions_UniqueOptions, options.Union());
+ gd._operators.push_back(op_offset);
+}
+
void OperationExporter::visit(luci::CircleUnpack *node)
{
LOGGER(l);
scale = builder.CreateVector(quantparam->scale);
zero_point = builder.CreateVector(quantparam->zerop);
}
- return circle::CreateQuantizationParameters(builder, min, max, scale, zero_point);
+ // Note: QuantizationDetails is not supported
+ return circle::CreateQuantizationParameters(builder, min, max, scale, zero_point,
+ circle::QuantizationDetails::QuantizationDetails_NONE,
+ 0, quantparam->quantized_dimension);
}
void exportOpDefinedTensor(const CircleTensoInfo &info, FlatBufferBuilder &builder,
#include "Nodes/CircleMirrorPad.h"
#include "Nodes/CircleMul.h"
#include "Nodes/CircleNeg.h"
+#include "Nodes/CircleNonMaxSuppressionV4.h"
#include "Nodes/CircleNotEqual.h"
#include "Nodes/CircleOneHot.h"
#include "Nodes/CirclePack.h"
#include "Nodes/CircleTopKV2.h"
#include "Nodes/CircleTranspose.h"
#include "Nodes/CircleTransposeConv.h"
+#include "Nodes/CircleUnique.h"
#include "Nodes/CircleUnpack.h"
#include "Nodes/CircleWhere.h"
#include "Nodes/CircleWhile.h"
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
+#define __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
+
+#include "luci/Import/GraphBuilderBase.h"
+
+namespace luci
+{
+
+class CircleNonMaxSuppressionV4GraphBuilder : public GraphBuilderBase
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+ void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_UNIQUE_H__
+#define __LUCI_IMPORT_OP_CIRCLE_UNIQUE_H__
+
+#include "luci/Import/GraphBuilderBase.h"
+
+namespace luci
+{
+
+class CircleUniqueGraphBuilder : public GraphBuilderBase
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+ void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_UNIQUE_H__
const auto &max = quantization->max;
const auto &scale = quantization->scale;
const auto &zero_point = quantization->zero_point;
+ const auto &quantized_dimension = quantization->quantized_dimension;
if ((!min.empty() && !max.empty()) || (!scale.empty() && !zero_point.empty()))
{
quantparam->max = max;
quantparam->scale = scale;
quantparam->zerop = zero_point;
+ quantparam->quantized_dimension = quantized_dimension;
return quantparam;
}
CIRCLE_NODE(MIRROR_PAD, CircleMirrorPadGraphBuilder); // 100
CIRCLE_NODE(MUL, CircleMulGraphBuilder); // 18
CIRCLE_NODE(NEG, CircleNegGraphBuilder); // 59
+ CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, CircleNonMaxSuppressionV4GraphBuilder); // 120,
CIRCLE_NODE(NOT_EQUAL, CircleNotEqualGraphBuilder); // 72
CIRCLE_NODE(ONE_HOT, CircleOneHotGraphBuilder); // 85
CIRCLE_NODE(PACK, CirclePackGraphBuilder); // 83
CIRCLE_NODE(TOPK_V2, CircleTopKV2GraphBuilder); // 48
CIRCLE_NODE(TRANSPOSE, CircleTransposeGraphBuilder); // 39
CIRCLE_NODE(TRANSPOSE_CONV, CircleTransposeConvGraphBuilder); // 67
+ CIRCLE_NODE(UNIQUE, CircleUniqueGraphBuilder); // 103
CIRCLE_NODE(UNPACK, CircleUnpackGraphBuilder); // 88
CIRCLE_NODE(WHERE, CircleWhereGraphBuilder); // 109
CIRCLE_NODE(WHILE, CircleWhileGraphBuilder); // 119
// BuiltinOperator_ARG_MAX = 56,
// BuiltinOperator_PADV2 = 60,
// BuiltinOperator_FAKE_QUANT = 80,
- // BuiltinOperator_UNIQUE = 103,
// BuiltinOperator_QUANTIZE = 114,
// BuiltinOperator_HARD_SWISH = 117,
- // BuiltinOperator_NON_MAX_SUPPRESSION_V4 = 120,
// BuiltinOperator_NON_MAX_SUPPRESSION_V5 = 121,
// BuiltinOperator_DENSIFY = 124,
}
#include <gtest/gtest.h>
-TEST(TensorFlowLiteImport, Dummy) { luci::Importer import; }
+TEST(TensorFlowLiteImport, Dummy)
+{
+ luci::Importer import;
+
+ SUCCEED();
+}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleAbs>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
return node;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleAdd>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
const auto *options = op.builtin_options.AsAddOptions();
node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleArgMax>();
- node->input(inputs[0]);
- node->dimension(inputs[1]);
+ node->input(inputs.at(0));
+ node->dimension(inputs.at(1));
const auto *options = op.builtin_options.AsArgMaxOptions();
node->output_type(luci_datatype(options->output_type));
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleArgMin>();
- node->input(inputs[0]);
- node->dimension(inputs[1]);
+ node->input(inputs.at(0));
+ node->dimension(inputs.at(1));
const auto *options = op.builtin_options.AsArgMinOptions();
node->output_type(luci_datatype(options->output_type));
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleAveragePool2D>();
- node->value(inputs[0]);
+ node->value(inputs.at(0));
const auto *options = op.builtin_options.AsPool2DOptions();
node->padding(luci_padding(options->padding));
{
auto *node = graph->nodes()->create<CircleBCQFullyConnected>();
- node->input(inputs[0]);
- node->weights_scales(inputs[1]);
- node->weights_binary(inputs[2]);
- node->bias(inputs[3]);
- node->weights_clusters(inputs[4]);
+ node->input(inputs.at(0));
+ node->weights_scales(inputs.at(1));
+ node->weights_binary(inputs.at(2));
+ node->bias(inputs.at(3));
+ node->weights_clusters(inputs.at(4));
// TODO Find and move to appropriate place for setting optional input
if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
{
auto *node = graph->nodes()->create<CircleBCQGather>();
- node->input_scales(inputs[0]);
- node->input_binary(inputs[1]);
- node->indices(inputs[2]);
- node->input_clusters(inputs[3]);
+ node->input_scales(inputs.at(0));
+ node->input_binary(inputs.at(1));
+ node->indices(inputs.at(2));
+ node->input_clusters(inputs.at(3));
const auto *options = op.builtin_options.AsBCQGatherOptions();
node->input_hidden_size(options->input_hidden_size);
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleBatchMatMul>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
const auto *options = op.builtin_options.AsBatchMatMulOptions();
node->adj_x(options->adjoint_lhs);
// input 1 and 2 should have INT32/INT64 type
const auto &tensors = args.reader.tensors();
- const auto &tensor_1 = tensors.at(inputs[1]);
+ const auto &tensor_1 = tensors.at(inputs.at(1));
switch (tensor_1->type)
{
case circle::TensorType_INT32:
default:
return false;
}
- const auto &tensor_2 = tensors.at(inputs[2]);
+ const auto &tensor_2 = tensors.at(inputs.at(2));
switch (tensor_2->type)
{
case circle::TensorType_INT32:
}
// Only support input shape dimension 3 and 4 only
- const auto &tensor_0 = tensors.at(inputs[0]);
+ const auto &tensor_0 = tensors.at(inputs.at(0));
const auto t_0_s = tensor_0->shape.size();
if (t_0_s != 3 && t_0_s != 4)
return false;
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleBatchToSpaceND>();
- node->input(inputs[0]);
- node->block_shape(inputs[1]);
- node->crops(inputs[2]);
+ node->input(inputs.at(0));
+ node->block_shape(inputs.at(1));
+ node->crops(inputs.at(2));
// No options for BatchToSpaceND
const circle::TensorT &output_tensor = *tensors[outputs[0]];
auto name = tensor_name(output_tensor);
- const auto &tensor_in = tensors.at(inputs[0]);
+ const auto &tensor_in = tensors.at(inputs.at(0));
if (tensor_in->type != options->in_data_type)
{
if (settings->get(luci::UserSettings::Key::DisableValidation))
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleCast>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
const auto *options = op.builtin_options.AsCastOptions();
if (options != nullptr)
}
else
{
- node->in_data_type(inputs[0]->dtype());
+ node->in_data_type(inputs.at(0)->dtype());
node->out_data_type(loco::DataType::Unknown);
// type inference should use node->dtype() for Unknown
// export should use BuiltinOptions_NONE for Unknown
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleCeil>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
return node;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleConv2D>();
- node->input(inputs[0]);
- node->filter(inputs[1]);
+ node->input(inputs.at(0));
+ node->filter(inputs.at(1));
// For now, bias is required (checked in `verify` method).
assert(inputs.size() == 3);
- node->bias(inputs[2]);
+ node->bias(inputs.at(2));
const auto *options = op.builtin_options.AsConv2DOptions();
node->padding(luci_padding(options->padding));
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleCos>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
// No options for Cos
const auto &tensors = args.reader.tensors();
- if (tensors[outputs[0]]->type != tensors[inputs[0]]->type)
+ if (tensors[outputs[0]]->type != tensors[inputs.at(0)]->type)
{
return false;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleDepthToSpace>();
- node->input(inputs[0]);
+ node->input(inputs.at(0));
const auto *options = op.builtin_options.AsDepthToSpaceOptions();
node->block_size(options->block_size);
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleDepthwiseConv2D>();
- node->input(inputs[0]);
- node->filter(inputs[1]);
+ node->input(inputs.at(0));
+ node->filter(inputs.at(1));
if (inputs.size() != 3)
throw oops::UserExn("DepthwiseConv2d without bias is unsupported");
- node->bias(inputs[2]);
+ node->bias(inputs.at(2));
const auto *options = op.builtin_options.AsDepthwiseConv2DOptions();
node->padding(luci_padding(options->padding));
loco::Graph *graph) const
{
auto node = graph->nodes()->create<CircleDiv>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
const auto *options = op.builtin_options.AsDivOptions();
node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
+ const auto &tensor = tensors.at(inputs.at(0));
switch (tensor->type)
{
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleElu>();
- node->features(inputs[0]);
+ node->features(inputs.at(0));
return node;
}
const auto &tensors = args.reader.tensors();
- return tensors[inputs[0]]->type == tensors[inputs[1]]->type;
+ return tensors[inputs.at(0)]->type == tensors[inputs.at(1)]->type;
}
CircleNode *CircleEqualGraphBuilder::build_node(const circle::OperatorT &,
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleEqual>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
return node;
}
// input type check
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
+ const auto &tensor = tensors.at(inputs.at(0));
switch (tensor->type)
{
case circle::TensorType_FLOAT16:
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleExp>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
return node;
}
const auto &tensors = args.reader.tensors();
- return tensors[inputs[1]]->type == circle::TensorType_INT32;
+ return tensors[inputs.at(1)]->type == circle::TensorType_INT32;
}
CircleNode *CircleExpandDimsGraphBuilder::build_node(const circle::OperatorT &,
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleExpandDims>();
- node->input(inputs[0]);
- node->axis(inputs[1]);
+ node->input(inputs.at(0));
+ node->axis(inputs.at(1));
return node;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleFill>();
- node->dims(inputs[0]);
- node->value(inputs[1]);
+ node->dims(inputs.at(0));
+ node->value(inputs.at(1));
const auto *options = op.builtin_options.AsFillOptions();
(void)options;
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleFloor>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
return node;
}
}
const auto &tensors = args.reader.tensors();
- const auto &tensor_in_0 = tensors.at(inputs[0]);
- const auto &tensor_in_1 = tensors.at(inputs[1]);
+ const auto &tensor_in_0 = tensors.at(inputs.at(0));
+ const auto &tensor_in_1 = tensors.at(inputs.at(1));
const auto &tensor_out = tensors.at(outputs[0]);
if (tensor_in_0->type != tensor_in_1->type)
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleFloorDiv>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
return node;
}
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor_in_0 = tensors.at(inputs[0]);
- const auto &tensor_in_1 = tensors.at(inputs[1]);
+ const auto &tensor_in_0 = tensors.at(inputs.at(0));
+ const auto &tensor_in_1 = tensors.at(inputs.at(1));
if (tensor_in_0->type != tensor_in_1->type)
return false;
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleFloorMod>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
return node;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleFullyConnected>();
- node->input(inputs[0]);
- node->weights(inputs[1]);
- node->bias(inputs[2]); // bias is optional
+ node->input(inputs.at(0));
+ node->weights(inputs.at(1));
+ node->bias(inputs.at(2)); // bias is optional
// TODO Find and move to appropriate place for setting optional input
if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
{
auto *node = graph->nodes()->create<CircleGather>();
- node->params(inputs[0]);
- node->indices(inputs[1]);
+ node->params(inputs.at(0));
+ node->indices(inputs.at(1));
const auto *options = op.builtin_options.AsGatherOptions();
node->axis(options->axis);
if (outputs.size() != 1)
return false;
- auto &indices_tensor = args.reader.tensors()[inputs[1]];
+ auto &indices_tensor = args.reader.tensors()[inputs.at(1)];
if (!(indices_tensor->type == circle::TensorType::TensorType_INT32 ||
indices_tensor->type == circle::TensorType::TensorType_INT64))
{
auto *node = graph->nodes()->create<CircleGatherNd>();
- node->params(inputs[0]);
- node->indices(inputs[1]);
+ node->params(inputs.at(0));
+ node->indices(inputs.at(1));
// GatherNd options empty
const auto &tensors = args.reader.tensors();
- if (tensors[inputs[0]]->type != tensors[inputs[1]]->type)
+ if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
return false;
// NOTE: real models do have output dtype NOT BOOL
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleGreater>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
return node;
}
const auto &tensors = args.reader.tensors();
- if (tensors[inputs[0]]->type != tensors[inputs[1]]->type)
+ if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
{
return false;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleGreaterEqual>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
return node;
}
// input 0 should be BOOL type
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
+ const auto &tensor = tensors.at(inputs.at(0));
if (tensor->type != circle::TensorType_BOOL)
return false;
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleInstanceNorm>();
- node->input(inputs[0]);
- node->gamma(inputs[1]);
- node->beta(inputs[2]);
+ node->input(inputs.at(0));
+ node->gamma(inputs.at(1));
+ node->beta(inputs.at(2));
const auto *options = op.builtin_options.AsInstanceNormOptions();
node->epsilon(options->epsilon);
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleL2Normalize>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
const auto *options = op.builtin_options.AsL2NormOptions();
node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleL2Pool2D>();
- node->value(inputs[0]);
+ node->value(inputs.at(0));
const auto *options = op.builtin_options.AsPool2DOptions();
node->padding(luci_padding(options->padding));
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleLeakyRelu>();
- node->features(inputs[0]);
+ node->features(inputs.at(0));
const auto *options = op.builtin_options.AsLeakyReluOptions();
node->alpha(options->alpha);
}
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
+ const auto &tensor = tensors.at(inputs.at(0));
switch (tensor->type)
{
return false;
}
- if (tensors[inputs[1]]->type != tensor->type)
+ if (tensors[inputs.at(1)]->type != tensor->type)
{
return false;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleLess>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
return node;
}
const auto &tensors = args.reader.tensors();
- if (tensors[inputs[0]]->type != tensors[inputs[1]]->type)
+ if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
{
return false;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleLessEqual>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
return node;
}
const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleLocalResponseNormalization>();
- node->input(inputs[0]);
+ node->input(inputs.at(0));
const auto *options = op.builtin_options.AsLocalResponseNormalizationOptions();
node->radius(options->radius);
// Must be one of bfloat16, half, float32, float64, complex64, complex128.
// Currently circle supports half(float16), float32, float64, complex64.
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
+ const auto &tensor = tensors.at(inputs.at(0));
switch (tensor->type)
{
case circle::TensorType_FLOAT16:
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleLog>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
// No options for Log
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleLogSoftmax>();
- node->logits(inputs[0]);
+ node->logits(inputs.at(0));
return node;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleLogicalAnd>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
return node;
}
// Only BOOL type is allowed for the input
const auto &inputs = args.op.inputs;
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
+ const auto &tensor = tensors.at(inputs.at(0));
if (tensor->type != circle::TensorType::TensorType_BOOL)
return false;
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleLogicalNot>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
return node;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleLogicalOr>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
return node;
}
if (outputs.size() != 1)
return false;
- // Must be one of the following types
- // float16, float32, float64, complex64, or complex128
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
- switch (tensor->type)
- {
- case circle::TensorType_FLOAT16:
- case circle::TensorType_FLOAT32:
- case circle::TensorType_FLOAT64:
- case circle::TensorType_COMPLEX64:
- break;
- default:
- return false;
- }
-
- if (tensors.at(inputs[0])->type != tensors.at(outputs[0])->type)
+ if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
return false;
return true;
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleLogistic>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
return node;
}
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
+ const auto &tensor = tensors.at(inputs.at(0));
if (tensors[outputs[0]]->type != tensor->type)
return false;
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleMatrixDiag>();
- node->diagonal(inputs[0]);
+ node->diagonal(inputs.at(0));
return node;
}
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
+ const auto &tensor = tensors.at(inputs.at(0));
if (tensors[outputs[0]]->type != tensor->type)
return false;
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleMatrixSetDiag>();
- node->input(inputs[0]);
- node->diagonal(inputs[1]);
+ node->input(inputs.at(0));
+ node->diagonal(inputs.at(1));
return node;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleMaxPool2D>();
- node->value(inputs[0]);
+ node->value(inputs.at(0));
const auto *options = op.builtin_options.AsPool2DOptions();
node->padding(luci_padding(options->padding));
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
+ const auto &tensor = tensors.at(inputs.at(0));
switch (tensor->type)
{
return false;
}
- if (tensors[inputs[1]]->type != tensor->type)
+ if (tensors[inputs.at(1)]->type != tensor->type)
return false;
if (tensors[outputs[0]]->type != tensor->type)
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleMaximum>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
return node;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleMean>();
- node->input(inputs[0]);
- node->reduction_indices(inputs[1]);
+ node->input(inputs.at(0));
+ node->reduction_indices(inputs.at(1));
const auto *options = op.builtin_options.AsReducerOptions();
node->keep_dims(options->keep_dims);
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
+ const auto &tensor = tensors.at(inputs.at(0));
switch (tensor->type)
{
return false;
}
- if (tensors[inputs[1]]->type != tensor->type)
+ if (tensors[inputs.at(1)]->type != tensor->type)
return false;
if (tensors[outputs[0]]->type != tensor->type)
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleMinimum>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
return node;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleMirrorPad>();
- node->input(inputs[0]);
- node->paddings(inputs[1]);
+ node->input(inputs.at(0));
+ node->paddings(inputs.at(1));
const auto *options = op.builtin_options.AsMirrorPadOptions();
node->mode(luci_mirrorpad_mode(options->mode));
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleMul>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
const auto *options = op.builtin_options.AsMulOptions();
node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleNeg>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
return node;
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleNonMaxSuppressionV4.h"
+
+#include <luci/IR/Nodes/CircleNonMaxSuppressionV4.h>
+#include <luci/IR/Nodes/CircleNonMaxSuppressionV4Out.h>
+
+#include <loco.h>
+#include <oops/UserExn.h>
+
+namespace luci
+{
+
+bool CircleNonMaxSuppressionV4GraphBuilder::validate(const ValidateArgs &args) const
+{
+ const auto &inputs = args.op.inputs;
+ const auto &outputs = args.op.outputs;
+
+ if (inputs.size() != 5)
+ return false;
+ if (outputs.size() != 2)
+ return false;
+
+ const auto &tensors = args.reader.tensors();
+ const auto &boxes_tensor = tensors.at(inputs[0]);
+ if (boxes_tensor->shape.size() != 2)
+ return false;
+ if (boxes_tensor->shape.at(1) != 4)
+ return false;
+ if (boxes_tensor->shape.at(0) != tensors.at(inputs[1])->shape.at(0))
+ return false;
+
+ if (tensors.at(inputs[2])->type != circle::TensorType_INT32)
+ return false;
+ if (tensors.at(inputs[3])->type != circle::TensorType_FLOAT32)
+ return false;
+ if (tensors.at(inputs[4])->type != circle::TensorType_FLOAT32)
+ return false;
+
+ return true;
+}
+
+/**
+ * @brief NonMaxSuppressionV4 Node builder
+ *
+ * @note Current loco does not provide multiple outputs
+ * We will create multiple NonMasSuppressionV4Oout nodes to emulate this
+ */
+
+void CircleNonMaxSuppressionV4GraphBuilder::build(const circle::OperatorT &op,
+ GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ auto graph = context->graph();
+
+ const std::vector<int32_t> &inputs = op.inputs;
+ const std::vector<int32_t> &outputs = op.outputs;
+ const auto &tensors = context->reader()->tensors();
+ const auto &opcodes = context->reader()->opcodes();
+ auto tensors_ptr = context->reader()->tensors_ptr();
+ assert(tensors_ptr != nullptr);
+
+ std::vector<CircleNode *> input_nodes;
+ for (const int32_t input_tensor_index : inputs)
+ {
+ input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
+ }
+
+ // Create CircleNonMaxSuppressionV4
+ auto node = graph->nodes()->create<CircleNonMaxSuppressionV4>();
+ node->boxes(input_nodes[0]);
+ node->scores(input_nodes[1]);
+ node->max_output_size(input_nodes[2]);
+ node->iou_threshold(input_nodes[3]);
+ node->score_threshold(input_nodes[4]);
+
+ assert(outputs.size() == 2);
+ {
+ // Let's use name of output 0 as NonMaxSuppressionV4 name
+ const circle::TensorT &output_tensor = *tensors[outputs[0]];
+ node->name(tensor_name(output_tensor));
+ node->op_version(opcodes[op.opcode_index].get()->version);
+
+ // NOTE We don't set quantization for NonMaxSuppressionV4 itself but to virtual outputs
+ }
+
+ // Create virtual outputs of NonMaxSuppressionV4
+ for (size_t n = 0; n < outputs.size(); ++n)
+ {
+ const circle::TensorT &output_tensor = *tensors[outputs[n]];
+
+ auto *nodeout = graph->nodes()->create<CircleNonMaxSuppressionV4Out>();
+ copy_tensor_attributes(output_tensor, nodeout);
+
+ // mark shape_status
+ if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
+ nodeout->shape_status(ShapeStatus::NOSHAPE);
+ else
+ nodeout->shape_status(ShapeStatus::VALID);
+
+ nodeout->input(node);
+ nodeout->index(n);
+
+ context->nodefinder()->enroll(outputs[n], nodeout);
+ }
+}
+
+} // namespace luci
const auto &tensors = args.reader.tensors();
- if (tensors[inputs[0]]->type != tensors[inputs[1]]->type)
+ if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
{
return false;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleNotEqual>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
return node;
}
return false;
const auto &tensors = args.reader.tensors();
- const auto &indices = tensors.at(inputs[0]);
- const auto &depth = tensors.at(inputs[1]);
- const auto &on_value = tensors.at(inputs[2]);
- const auto &off_value = tensors.at(inputs[3]);
+ const auto &indices = tensors.at(inputs.at(0));
+ const auto &depth = tensors.at(inputs.at(1));
+ const auto &on_value = tensors.at(inputs.at(2));
+ const auto &off_value = tensors.at(inputs.at(3));
if (options->axis < -1 || options->axis > static_cast<int32_t>(indices->shape.size()))
return false;
{
auto *node = graph->nodes()->create<CircleOneHot>();
- node->indices(inputs[0]);
- node->depth(inputs[1]);
- node->on_value(inputs[2]);
- node->off_value(inputs[3]);
+ node->indices(inputs.at(0));
+ node->depth(inputs.at(1));
+ node->on_value(inputs.at(2));
+ node->off_value(inputs.at(3));
const auto *options = op.builtin_options.AsOneHotOptions();
node->axis(options->axis);
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CirclePRelu>();
- node->input(inputs[0]);
- node->alpha(inputs[1]);
+ node->input(inputs.at(0));
+ node->alpha(inputs.at(1));
// PRelu options are empty
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CirclePad>();
- node->input(inputs[0]);
- node->paddings(inputs[1]);
+ node->input(inputs.at(0));
+ node->paddings(inputs.at(1));
const auto *options = op.builtin_options.AsPadOptions();
(void)options; // There are no options.
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CirclePow>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
// Pow options are empty
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleRange>();
- node->start(inputs[0]);
- node->limit(inputs[1]);
- node->delta(inputs[2]);
+ node->start(inputs.at(0));
+ node->limit(inputs.at(1));
+ node->delta(inputs.at(2));
return node;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleRank>();
- node->input(inputs[0]);
+ node->input(inputs.at(0));
return node;
}
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor_0 = tensors.at(inputs[0]);
- const auto &tensor_1 = tensors.at(inputs[1]);
+ const auto &tensor_0 = tensors.at(inputs.at(0));
+ const auto &tensor_1 = tensors.at(inputs.at(1));
const auto &tensor_o = tensors.at(outputs[0]);
if (tensor_0->type != circle::TensorType_BOOL)
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleReduceAny>();
- node->input(inputs[0]);
- node->reduction_indices(inputs[1]);
+ node->input(inputs.at(0));
+ node->reduction_indices(inputs.at(1));
const auto *options = op.builtin_options.AsReducerOptions();
node->keep_dims(options->keep_dims);
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor_axis = tensors.at(inputs[1]);
+ const auto &tensor_axis = tensors.at(inputs.at(1));
switch (tensor_axis->type)
{
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleReduceMax>();
- node->input(inputs[0]);
- node->reduction_indices(inputs[1]);
+ node->input(inputs.at(0));
+ node->reduction_indices(inputs.at(1));
const auto *options = op.builtin_options.AsReducerOptions();
node->keep_dims(options->keep_dims);
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor_axis = tensors.at(inputs[1]);
+ const auto &tensor_axis = tensors.at(inputs.at(1));
switch (tensor_axis->type)
{
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleReduceMin>();
- node->input(inputs[0]);
- node->reduction_indices(inputs[1]);
+ node->input(inputs.at(0));
+ node->reduction_indices(inputs.at(1));
const auto *options = op.builtin_options.AsReducerOptions();
node->keep_dims(options->keep_dims);
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor_1 = tensors.at(inputs[1]);
+ const auto &tensor_1 = tensors.at(inputs.at(1));
// TODO check input types
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleReduceProd>();
- node->input(inputs[0]);
- node->reduction_indices(inputs[1]);
+ node->input(inputs.at(0));
+ node->reduction_indices(inputs.at(1));
const auto *options = op.builtin_options.AsReducerOptions();
node->keep_dims(options->keep_dims);
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleRelu>();
- node->features(inputs[0]);
+ node->features(inputs.at(0));
return node;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleRelu6>();
- node->features(inputs[0]);
+ node->features(inputs.at(0));
return node;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleReluN1To1>();
- node->features(inputs[0]);
+ node->features(inputs.at(0));
return node;
}
{
// If the second input is not provided, generate it based on the value of the attribute.
// TODO Presence of the second input is the current requirement of the IR.
- auto *shape_node = (inputs.size() == 2) ? inputs[1] : nullptr;
+ auto *shape_node = (inputs.size() == 2) ? inputs.at(1) : nullptr;
if (shape_node == nullptr)
{
const auto *options = op.builtin_options.AsReshapeOptions();
}
auto *node = graph->nodes()->create<CircleReshape>();
- node->tensor(inputs[0]);
+ node->tensor(inputs.at(0));
node->shape(shape_node);
const auto *options = op.builtin_options.AsReshapeOptions();
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleResizeBilinear>();
- node->input(inputs[0]);
- node->size(inputs[1]);
+ node->input(inputs.at(0));
+ node->size(inputs.at(1));
const auto *options = op.builtin_options.AsResizeBilinearOptions();
node->align_corners(options->align_corners);
const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleResizeNearestNeighbor>();
- node->input(inputs[0]);
- node->size(inputs[1]);
+ node->input(inputs.at(0));
+ node->size(inputs.at(1));
const auto *options = op.builtin_options.AsResizeNearestNeighborOptions();
node->align_corners(options->align_corners);
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor_in = tensors.at(inputs[0]);
- const auto &tensor_lengths = tensors.at(inputs[1]);
+ const auto &tensor_in = tensors.at(inputs.at(0));
+ const auto &tensor_lengths = tensors.at(inputs.at(1));
const auto &tensor_out = tensors.at(outputs[0]);
switch (tensor_lengths->type)
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleReverseSequence>();
- node->input(inputs[0]);
- node->seq_lengths(inputs[1]);
+ node->input(inputs.at(0));
+ node->seq_lengths(inputs.at(1));
const auto *options = op.builtin_options.AsReverseSequenceOptions();
node->seq_axis(options->seq_dim);
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor_in = tensors.at(inputs[0]);
- const auto &tensor_axis = tensors.at(inputs[1]);
+ const auto &tensor_in = tensors.at(inputs.at(0));
+ const auto &tensor_axis = tensors.at(inputs.at(1));
const auto &tensor_out = tensors.at(outputs[0]);
switch (tensor_axis->type)
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleReverseV2>();
- node->tensor(inputs[0]);
- node->axis(inputs[1]);
+ node->tensor(inputs.at(0));
+ node->axis(inputs.at(1));
return node;
}
// bfloat16, half (float16), float32, float64, complex64, complex128
// Currently, circle supports float16, float32, complex64
const auto &tensors = args.reader.tensors();
- const auto &tensor_in = tensors.at(inputs[0]);
+ const auto &tensor_in = tensors.at(inputs.at(0));
const auto &tensor_out = tensors.at(outputs[0]);
switch (tensor_in->type)
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleRound>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
return node;
}
// bfloat16, half (float16), float32, float64, complex64, complex128
// Currently, circle supports float16, float32, complex64
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
+ const auto &tensor = tensors.at(inputs.at(0));
switch (tensor->type)
{
case circle::TensorType_FLOAT16:
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleRsqrt>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
return node;
}
// indices must have the same type as shape
const auto &tensors = args.reader.tensors();
- if (tensors[inputs[0]]->type != tensors[inputs[2]]->type)
+ if (tensors[inputs.at(0)]->type != tensors[inputs.at(2)]->type)
return false;
// indices must be either int32 or int64
- if (tensors[inputs[0]]->type != circle::TensorType_INT32 &&
- tensors[inputs[0]]->type != circle::TensorType_INT64)
+ if (tensors[inputs.at(0)]->type != circle::TensorType_INT32 &&
+ tensors[inputs.at(0)]->type != circle::TensorType_INT64)
return false;
return true;
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleScatterNd>();
- node->indices(inputs[0]);
- node->updates(inputs[1]);
- node->shape(inputs[2]);
+ node->indices(inputs.at(0));
+ node->updates(inputs.at(1));
+ node->shape(inputs.at(2));
return node;
}
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor_in = tensors.at(inputs[0]);
+ const auto &tensor_in = tensors.at(inputs.at(0));
const auto &tensor_out = tensors.at(outputs[0]);
- const auto &tensor_ids = tensors.at(inputs[1]);
+ const auto &tensor_ids = tensors.at(inputs.at(1));
switch (tensor_ids->type)
{
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleSegmentSum>();
- node->input(inputs[0]);
- node->segment_ids(inputs[1]);
+ node->input(inputs.at(0));
+ node->segment_ids(inputs.at(1));
return node;
}
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
+ const auto &tensor = tensors.at(inputs.at(0));
if (tensor->type != circle::TensorType_BOOL)
return false;
// TODO check dtypes for input 1, 2
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleSelect>();
- node->condition(inputs[0]);
- node->t(inputs[1]);
- node->e(inputs[2]);
+ node->condition(inputs.at(0));
+ node->t(inputs.at(1));
+ node->e(inputs.at(2));
return node;
}
return false;
const auto &tensors = args.reader.tensors();
- const auto &condition = tensors.at(inputs[0]);
+ const auto &condition = tensors.at(inputs.at(0));
if (condition->type != circle::TensorType_BOOL)
return false;
- const auto &t = tensors.at(inputs[1]);
- const auto &e = tensors.at(inputs[2]);
+ const auto &t = tensors.at(inputs.at(1));
+ const auto &e = tensors.at(inputs.at(2));
if (t->type != e->type)
return false;
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleSelectV2>();
- node->condition(inputs[0]);
- node->t(inputs[1]);
- node->e(inputs[2]);
+ node->condition(inputs.at(0));
+ node->t(inputs.at(1));
+ node->e(inputs.at(2));
return node;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleShape>();
- node->input(inputs[0]);
+ node->input(inputs.at(0));
const auto *options = op.builtin_options.AsShapeOptions();
node->out_type(luci_datatype(options->out_type));
// input type check
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
+ const auto &tensor = tensors.at(inputs.at(0));
switch (tensor->type)
{
case circle::TensorType_FLOAT16:
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleSin>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
// No options for Sin
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleSlice>();
- node->input(inputs[0]);
- node->begin(inputs[1]);
- node->size(inputs[2]);
+ node->input(inputs.at(0));
+ node->begin(inputs.at(1));
+ node->size(inputs.at(2));
return node;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleSoftmax>();
- node->logits(inputs[0]);
+ node->logits(inputs.at(0));
const auto *options = op.builtin_options.AsSoftmaxOptions();
node->beta(options->beta);
// input 1 and 2 should have INT32/INT64 type
const auto &tensors = args.reader.tensors();
- const auto &tensor_1 = tensors.at(inputs[1]);
+ const auto &tensor_1 = tensors.at(inputs.at(1));
switch (tensor_1->type)
{
case circle::TensorType_INT32:
default:
return false;
}
- const auto &tensor_2 = tensors.at(inputs[2]);
+ const auto &tensor_2 = tensors.at(inputs.at(2));
switch (tensor_2->type)
{
case circle::TensorType_INT32:
}
// Only support input shape dimension 3 and 4 only
- const auto &tensor_0 = tensors.at(inputs[0]);
+ const auto &tensor_0 = tensors.at(inputs.at(0));
const auto t_0_s = tensor_0->shape.size();
if (t_0_s != 3 && t_0_s != 4)
return false;
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleSpaceToBatchND>();
- node->input(inputs[0]);
- node->block_shape(inputs[1]);
- node->paddings(inputs[2]);
+ node->input(inputs.at(0));
+ node->block_shape(inputs.at(1));
+ node->paddings(inputs.at(2));
// No options for SpaceToBatchND
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleSpaceToDepth>();
- node->input(inputs[0]);
+ node->input(inputs.at(0));
const auto *options = op.builtin_options.AsSpaceToDepthOptions();
node->block_size(options->block_size);
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleSparseToDense>();
- node->indices(inputs[0]);
- node->output_shape(inputs[1]);
- node->values(inputs[2]);
- node->default_value(inputs[3]);
+ node->indices(inputs.at(0));
+ node->output_shape(inputs.at(1));
+ node->values(inputs.at(2));
+ node->default_value(inputs.at(3));
const auto *options = op.builtin_options.AsSparseToDenseOptions();
node->validate_indices(options->validate_indices);
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleSqrt>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
return node;
}
// bfloat16, half (float16), float32, float64, complex64, complex128
// Currently, circle supports float16, float32, complex64
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
+ const auto &tensor = tensors.at(inputs.at(0));
switch (tensor->type)
{
case circle::TensorType_INT32:
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleSquare>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
return node;
}
// Inputs must be one of the following types
// bfloat16, half(float16), float32, float64, int32, int64, complex64, complex128
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
+ const auto &tensor = tensors.at(inputs.at(0));
switch (tensor->type)
{
case circle::TensorType_FLOAT16:
}
// Input types must match
- if (tensors.at(inputs[0])->type != tensors.at(inputs[1])->type)
+ if (tensors.at(inputs.at(0))->type != tensors.at(inputs.at(1))->type)
return false;
// Input and output types must match
- if (tensors.at(inputs[0])->type != tensors.at(outputs[0])->type)
+ if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
return false;
return true;
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleSquaredDifference>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
return node;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleSqueeze>();
- node->input(inputs[0]);
+ node->input(inputs.at(0));
const auto *options = op.builtin_options.AsSqueezeOptions();
assert(options);
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleStridedSlice>();
- node->input(inputs[0]);
- node->begin(inputs[1]);
- node->end(inputs[2]);
- node->strides(inputs[3]);
+ node->input(inputs.at(0));
+ node->begin(inputs.at(1));
+ node->end(inputs.at(2));
+ node->strides(inputs.at(3));
const auto *options = op.builtin_options.AsStridedSliceOptions();
node->begin_mask(options->begin_mask);
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleSub>();
- node->x(inputs[0]);
- node->y(inputs[1]);
+ node->x(inputs.at(0));
+ node->y(inputs.at(1));
const auto *options = op.builtin_options.AsSubOptions();
node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleSum>();
- node->input(inputs[0]);
- node->reduction_indices(inputs[1]);
+ node->input(inputs.at(0));
+ node->reduction_indices(inputs.at(1));
const auto *options = op.builtin_options.AsReducerOptions();
node->keep_dims(options->keep_dims);
const auto &inputs = args.op.inputs;
if (inputs.size() != 1)
return false;
+ const auto &outputs = args.op.outputs;
+ if (outputs.size() != 1)
+ return false;
- // Must be one of the following types
- // bfloat16, half (float16), float32, float64, complex64, complex128
- // Currently, circle supports float16, float32, complex64
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
- switch (tensor->type)
- {
- case circle::TensorType_FLOAT16:
- case circle::TensorType_FLOAT32:
- case circle::TensorType_COMPLEX64:
- break;
- default:
- return false;
- }
+ if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
+ return false;
return true;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleTanh>();
- node->x(inputs[0]);
+ node->x(inputs.at(0));
return node;
}
if (outputs.size() != 1)
return false;
- // Multiples (inputs[1]) must be one of the following types
+ // Multiples (inputs.at(1)) must be one of the following types
// int32, int64
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[1]);
+ const auto &tensor = tensors.at(inputs.at(1));
switch (tensor->type)
{
case circle::TensorType_INT32:
}
// Type of input and output must be the same
- if (tensors.at(inputs[0])->type != tensors.at(outputs[0])->type)
+ if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
return false;
return true;
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleTile>();
- node->input(inputs[0]);
- node->multiples(inputs[1]);
+ node->input(inputs.at(0));
+ node->multiples(inputs.at(1));
return node;
}
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[1]);
+ const auto &tensor = tensors.at(inputs.at(1));
if (tensor->type != circle::TensorType_INT32)
return false;
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleTranspose>();
- node->a(inputs[0]);
- node->perm(inputs[1]);
+ node->a(inputs.at(0));
+ node->perm(inputs.at(1));
const auto *options = op.builtin_options.AsTransposeOptions();
(void)options;
if (args.op.inputs.size() != 3)
return false;
+ const auto &inputs = args.op.inputs;
+ const auto &tensors = args.reader.tensors();
+ const auto &filter_tensor = tensors.at(inputs.at(1));
+ const auto &filter_shape = filter_tensor.get()->shape;
+ const auto &ifm_tensor = tensors.at(inputs.at(2));
+ const auto &ifm_shape = ifm_tensor.get()->shape;
+
+ // ifm and filters must be 4-D tensor
+ if (ifm_shape.size() != 4)
+ return false;
+ if (filter_shape.size() != 4)
+ return false;
+
+ // input shape : [batch, height, width, in_channels]
+ // filters shape : [output_channels, height, weight, in_channels]
+ if (ifm_tensor.get()->shape.at(3) != filter_tensor.get()->shape.at(3))
+ return false;
+
return true;
}
{
auto *node = graph->nodes()->create<CircleTransposeConv>();
- node->inputSizes(inputs[0]);
- node->filter(inputs[1]);
- node->outBackprop(inputs[2]);
+ node->inputSizes(inputs.at(0));
+ node->filter(inputs.at(1));
+ node->outBackprop(inputs.at(2));
const auto *options = op.builtin_options.AsTransposeConvOptions();
node->padding(luci_padding(options->padding));
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleUnique.h"
+
+#include <luci/IR/Nodes/CircleUnique.h>
+#include <luci/IR/Nodes/CircleUniqueOut.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleUniqueGraphBuilder::validate(const ValidateArgs &args) const
+{
+ if (args.op.inputs.size() != 1)
+ return false;
+
+ if (args.op.outputs.size() != 2)
+ return false;
+
+ return true;
+}
+
+void CircleUniqueGraphBuilder::build(const circle::OperatorT &op,
+ GraphBuilderContext *context) const
+{
+ assert(context != nullptr);
+
+ auto graph = context->graph();
+
+ const std::vector<int32_t> &inputs = op.inputs;
+ const std::vector<int32_t> &outputs = op.outputs;
+ const auto &tensors = context->reader()->tensors();
+ auto tensors_ptr = context->reader()->tensors_ptr();
+ assert(tensors_ptr != nullptr);
+
+ std::vector<CircleNode *> input_nodes;
+ for (const int32_t input_tensor_index : inputs)
+ {
+ input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
+ }
+
+ // Create CircleUnique
+ auto node = graph->nodes()->create<CircleUnique>();
+ node->input(input_nodes[0]);
+
+ const auto *options = op.builtin_options.AsUniqueOptions();
+ node->output_type(luci_datatype(options->idx_out_type));
+
+ assert(int32_t(outputs.size()) == 2);
+ // Let's use name of output 0 as Unique name
+ const circle::TensorT &output_tensor = *tensors[outputs[0]];
+ node->name(tensor_name(output_tensor));
+
+ // Create virtual outputs of Unique
+ for (int32_t n = 0; n < 2; ++n)
+ {
+ const circle::TensorT &output_tensor = *tensors[outputs[n]];
+
+ auto *nodeout = graph->nodes()->create<CircleUniqueOut>();
+ copy_tensor_attributes(output_tensor, nodeout);
+ // mark shape_status
+ if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
+ nodeout->shape_status(ShapeStatus::NOSHAPE);
+ else
+ nodeout->shape_status(ShapeStatus::VALID);
+
+ nodeout->input(node);
+ nodeout->index(n);
+
+ context->nodefinder()->enroll(outputs[n], nodeout);
+ }
+}
+
+} // namespace luci
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor = tensors.at(inputs[0]);
+ const auto &tensor = tensors.at(inputs.at(0));
const auto &shape = tensor->shape;
auto shape_size = static_cast<int32_t>(shape.size());
if (shape_size > 0)
return false;
const auto &tensors = args.reader.tensors();
- const auto &tensor_condition = tensors.at(inputs[0]);
+ const auto &tensor_condition = tensors.at(inputs.at(0));
const auto &tensor_out = tensors.at(outputs[0]);
if (tensor_condition->type != circle::TensorType_BOOL)
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleWhere>();
- node->condition(inputs[0]);
+ node->condition(inputs.at(0));
return node;
}
loco::Graph *graph) const
{
auto *node = graph->nodes()->create<CircleZerosLike>();
- node->input(inputs[0]);
+ node->input(inputs.at(0));
// ZerosLikeOptinos are empty
#include "Nodes/CircleMirrorPad.h"
#include "Nodes/CircleMul.h"
#include "Nodes/CircleNeg.h"
+#include "Nodes/CircleNonMaxSuppressionV4.h"
#include "Nodes/CircleNotEqual.h"
#include "Nodes/CircleOneHot.h"
#include "Nodes/CirclePack.h"
#include "Nodes/CirclePad.h"
+#include "Nodes/CirclePadV2.h"
#include "Nodes/CirclePow.h"
#include "Nodes/CirclePRelu.h"
#include "Nodes/CircleRange.h"
#include "Nodes/CircleTopKV2.h"
#include "Nodes/CircleTranspose.h"
#include "Nodes/CircleTransposeConv.h"
+#include "Nodes/CircleUnique.h"
#include "Nodes/CircleUnpack.h"
#include "Nodes/CircleWhere.h"
#include "Nodes/CircleWhile.h"
#include "Nodes/CircleOutput.h"
#include "Nodes/CircleCustomOut.h"
#include "Nodes/CircleIfOut.h"
+#include "Nodes/CircleNonMaxSuppressionV4Out.h"
#include "Nodes/CircleUnpackOut.h"
+#include "Nodes/CircleUniqueOut.h"
#include "Nodes/CircleSplitOut.h"
#include "Nodes/CircleSplitVOut.h"
#include "Nodes/CircleTopKV2Out.h"
CIRCLE_NODE(CAST, luci::CircleCast)
CIRCLE_NODE(CEIL, luci::CircleCeil)
CIRCLE_NODE(CONCATENATION, luci::CircleConcatenation)
-CIRCLE_NODE(CONST, luci::CircleConst)
CIRCLE_NODE(CONV_2D, luci::CircleConv2D)
CIRCLE_NODE(COS, luci::CircleCos)
CIRCLE_NODE(CUSTOM, luci::CircleCustom)
CIRCLE_NODE(MIRROR_PAD, luci::CircleMirrorPad)
CIRCLE_NODE(MUL, luci::CircleMul)
CIRCLE_NODE(NEG, luci::CircleNeg)
+CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, luci::CircleNonMaxSuppressionV4)
CIRCLE_NODE(NOT_EQUAL, luci::CircleNotEqual)
CIRCLE_NODE(ONE_HOT, luci::CircleOneHot)
CIRCLE_NODE(PACK, luci::CirclePack)
CIRCLE_NODE(PAD, luci::CirclePad)
+CIRCLE_NODE(PADV2, luci::CirclePadV2)
CIRCLE_NODE(POW, luci::CirclePow)
CIRCLE_NODE(PRELU, luci::CirclePRelu)
CIRCLE_NODE(RANGE, luci::CircleRange)
CIRCLE_NODE(TOPK_V2, luci::CircleTopKV2)
CIRCLE_NODE(TRANSPOSE, luci::CircleTranspose)
CIRCLE_NODE(TRANSPOSE_CONV, luci::CircleTransposeConv)
+CIRCLE_NODE(UNIQUE, luci::CircleUnique)
CIRCLE_NODE(UNPACK, luci::CircleUnpack)
CIRCLE_NODE(WHERE, luci::CircleWhere)
CIRCLE_NODE(WHILE, luci::CircleWhile)
CIRCLE_NODE(BCQ_GATHER, luci::CircleBCQGather)
CIRCLE_NODE(INSTANCE_NORM, luci::CircleInstanceNorm)
// Virtual node(s)
+CIRCLE_NODE(CIRCLECONST, luci::CircleConst)
CIRCLE_NODE(CIRCLEINPUT, luci::CircleInput)
CIRCLE_NODE(CIRCLEOUTPUT, luci::CircleOutput)
CIRCLE_NODE(CIRCLEOUTPUTDUMMY, luci::CircleOutputDummy)
CIRCLE_NODE(CIRCLEOUTPUTEXCLUDE, luci::CircleOutputExclude)
CIRCLE_NODE(CIRCLECUSTOMOUT, luci::CircleCustomOut)
CIRCLE_NODE(CIRCLEIFOUT, luci::CircleIfOut)
+CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV4OUT, luci::CircleNonMaxSuppressionV4Out)
CIRCLE_NODE(CIRCLESPLITOUT, luci::CircleSplitOut)
CIRCLE_NODE(CIRCLESPLITVOUT, luci::CircleSplitVOut)
CIRCLE_NODE(CIRCLETOPKV2OUT, luci::CircleTopKV2Out)
+CIRCLE_NODE(CIRCLEUNIQUEOUT, luci::CircleUniqueOut)
CIRCLE_NODE(CIRCLEUNPACKOUT, luci::CircleUnpackOut)
CIRCLE_NODE(CIRCLEWHILEOUT, luci::CircleWhileOut)
std::vector<float> max;
std::vector<float> scale;
std::vector<int64_t> zerop;
+ int32_t quantized_dimension{0};
};
} // namespace luci
* @brief Class to build tensor data
* @note This will not be exported as a specific op
*/
-class CircleConst final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CONST>>
+class CircleConst final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLECONST>>
{
public:
CircleConst() = default;
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
+#define __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief NON_MAX_SUPPRESSION_V4 in Circle
+ */
+class CircleNonMaxSuppressionV4 final
+ : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V4>>
+{
+public:
+ loco::Node *boxes(void) const { return at(0)->node(); }
+ void boxes(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *scores(void) const { return at(1)->node(); }
+ void scores(loco::Node *node) { at(1)->node(node); }
+
+ loco::Node *max_output_size(void) const { return at(2)->node(); }
+ void max_output_size(loco::Node *node) { at(2)->node(node); }
+
+ loco::Node *iou_threshold(void) const { return at(3)->node(); }
+ void iou_threshold(loco::Node *node) { at(3)->node(node); }
+
+ loco::Node *score_threshold(void) const { return at(4)->node(); }
+ void score_threshold(loco::Node *node) { at(4)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV4OUT_H__
+#define __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV4OUT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief Virtual NONMAXSUPPRESSIONV4OUT in Circle
+ */
+class CircleNonMaxSuppressionV4Out final
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT>>
+{
+public:
+ CircleNonMaxSuppressionV4Out() = default;
+
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+public:
+ int32_t index(void) const { return _index; }
+ void index(int32_t index) { _index = index; }
+
+private:
+ int32_t _index{-1};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV4OUT_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEPADV2_H__
+#define __LUCI_IR_CIRCLEPADV2_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief PADV2 in Circle
+ */
+class CirclePadV2 final : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::PADV2>>
+{
+public:
+ CirclePadV2() = default;
+
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+ loco::Node *paddings(void) const { return at(1)->node(); }
+ void paddings(loco::Node *node) { at(1)->node(node); }
+
+ loco::Node *constant_values(void) const { return at(2)->node(); }
+ void constant_values(loco::Node *node) { at(2)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEPADV2_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCELUNIQUE_H__
+#define __LUCI_IR_CIRCELUNIQUE_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief Unique in Circle
+ */
+class CircleUnique final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::UNIQUE>>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+public:
+ loco::DataType idx_out_type(void) const { return _idx_out_type; }
+ void output_type(loco::DataType ot) { _idx_out_type = ot; }
+
+private:
+ loco::DataType _idx_out_type{loco::DataType::S32};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCELUNIQUE_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_UNIQUEOUT_H__
+#define __LUCI_IR_CIRCLE_UNIQUEOUT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief Virtual CIRCLEUNIQUEOUT in Circle
+ */
+class CircleUniqueOut final
+ : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEUNIQUEOUT>>
+{
+public:
+ CircleUniqueOut() = default;
+
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+
+public:
+ int32_t index(void) const { return _index; }
+ void index(int32_t index) { _index = index; }
+
+private:
+ int32_t _index{-1};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_UNIQUEOUT_H__
{
auto gs = luci::make_module();
- GTEST_SUCCEED();
+ SUCCEED();
}
TEST(ModuleTest, add)
ASSERT_EQ(0, custom_node.custom_code().size());
}
-TEST(CircleCustomTest, constructor_NEG) { ASSERT_DEBUG_DEATH(luci::CircleCustom{0}, ""); }
+TEST(CircleCustomTest, constructor_NEG)
+{
+ ASSERT_DEBUG_DEATH(luci::CircleCustom{0}, "");
+
+ SUCCEED();
+}
TEST(CircleCustomTest, invalidIndex_NEG)
{
TEST(CircleIfTestDeath, invalid_arity_NEG)
{
ASSERT_DEBUG_DEATH(luci::CircleIf very_long_name_if_node(0, 1), "");
+
+ SUCCEED();
}
TEST(CircleIfTestDeath, invalid_output_count_NEG)
{
ASSERT_DEBUG_DEATH(luci::CircleIf if_node(2, 0), "");
+
+ SUCCEED();
}
TEST(CircleIfTestDeath, invalid_input_get_index_NEG)
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleNonMaxSuppressionV4.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleNonMaxSuppressionV4Test, constructor)
+{
+ luci::CircleNonMaxSuppressionV4 nmsv4_node;
+
+ ASSERT_EQ(luci::CircleDialect::get(), nmsv4_node.dialect());
+ ASSERT_EQ(luci::CircleOpcode::NON_MAX_SUPPRESSION_V4, nmsv4_node.opcode());
+
+ ASSERT_EQ(nullptr, nmsv4_node.boxes());
+ ASSERT_EQ(nullptr, nmsv4_node.scores());
+ ASSERT_EQ(nullptr, nmsv4_node.max_output_size());
+ ASSERT_EQ(nullptr, nmsv4_node.iou_threshold());
+ ASSERT_EQ(nullptr, nmsv4_node.score_threshold());
+}
+
+TEST(CircleNonMaxSuppressionV4Test, input_NEG)
+{
+ luci::CircleNonMaxSuppressionV4 nmsv4_node;
+ luci::CircleNonMaxSuppressionV4 node;
+
+ nmsv4_node.boxes(&node);
+ nmsv4_node.scores(&node);
+ nmsv4_node.max_output_size(&node);
+ nmsv4_node.iou_threshold(&node);
+ nmsv4_node.score_threshold(&node);
+ ASSERT_NE(nullptr, nmsv4_node.boxes());
+ ASSERT_NE(nullptr, nmsv4_node.scores());
+ ASSERT_NE(nullptr, nmsv4_node.max_output_size());
+ ASSERT_NE(nullptr, nmsv4_node.iou_threshold());
+ ASSERT_NE(nullptr, nmsv4_node.score_threshold());
+
+ nmsv4_node.boxes(nullptr);
+ nmsv4_node.scores(nullptr);
+ nmsv4_node.max_output_size(nullptr);
+ nmsv4_node.iou_threshold(nullptr);
+ nmsv4_node.score_threshold(nullptr);
+ ASSERT_EQ(nullptr, nmsv4_node.boxes());
+ ASSERT_EQ(nullptr, nmsv4_node.scores());
+ ASSERT_EQ(nullptr, nmsv4_node.max_output_size());
+ ASSERT_EQ(nullptr, nmsv4_node.iou_threshold());
+ ASSERT_EQ(nullptr, nmsv4_node.score_threshold());
+}
+
+TEST(CircleNonMaxSuppressionV4Test, arity_NEG)
+{
+ luci::CircleNonMaxSuppressionV4 nmsv4_node;
+
+ ASSERT_NO_THROW(nmsv4_node.arg(4));
+ ASSERT_THROW(nmsv4_node.arg(5), std::out_of_range);
+}
+
+TEST(CircleNonMaxSuppressionV4Test, visit_mutable_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+ {
+ };
+
+ luci::CircleNonMaxSuppressionV4 nmsv4_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(nmsv4_node.accept(&tv), std::exception);
+}
+
+TEST(CircleNonMaxSuppressionV4Test, visit_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeVisitor<void>
+ {
+ };
+
+ luci::CircleNonMaxSuppressionV4 nmsv4_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(nmsv4_node.accept(&tv), std::exception);
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleNonMaxSuppressionV4Out.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleNonMaxSuppressionV4OutTest, constructor)
+{
+ luci::CircleNonMaxSuppressionV4Out vout_node;
+
+ ASSERT_EQ(luci::CircleDialect::get(), vout_node.dialect());
+ ASSERT_EQ(luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT, vout_node.opcode());
+
+ ASSERT_EQ(nullptr, vout_node.input());
+ ASSERT_EQ(-1, vout_node.index());
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CirclePadV2.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CirclePadV2Test, constructor_P)
+{
+ luci::CirclePadV2 node;
+
+ ASSERT_EQ(luci::CircleDialect::get(), node.dialect());
+ ASSERT_EQ(luci::CircleOpcode::PADV2, node.opcode());
+
+ ASSERT_EQ(nullptr, node.input());
+ ASSERT_EQ(nullptr, node.paddings());
+ ASSERT_EQ(nullptr, node.constant_values());
+}
+
+TEST(CirclePadV2Test, input_NEG)
+{
+ luci::CirclePadV2 pad_node;
+ luci::CirclePadV2 node;
+
+ pad_node.input(&node);
+ pad_node.paddings(&node);
+ pad_node.constant_values(&node);
+ ASSERT_NE(nullptr, pad_node.input());
+ ASSERT_NE(nullptr, pad_node.paddings());
+ ASSERT_NE(nullptr, pad_node.constant_values());
+
+ pad_node.input(nullptr);
+ pad_node.paddings(nullptr);
+ pad_node.constant_values(nullptr);
+ ASSERT_EQ(nullptr, pad_node.input());
+ ASSERT_EQ(nullptr, pad_node.paddings());
+ ASSERT_EQ(nullptr, pad_node.constant_values());
+}
+
+TEST(CirclePadV2Test, arity_NEG)
+{
+ luci::CirclePadV2 pad_node;
+
+ ASSERT_NO_THROW(pad_node.arg(2));
+ ASSERT_THROW(pad_node.arg(3), std::out_of_range);
+}
+
+TEST(CirclePadV2Test, visit_mutable_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+ {
+ };
+
+ luci::CirclePadV2 pad_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(pad_node.accept(&tv), std::exception);
+}
+
+TEST(CirclePadV2Test, visit_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeVisitor<void>
+ {
+ };
+
+ luci::CirclePadV2 pad_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(pad_node.accept(&tv), std::exception);
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleUnique.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleUniqueTest, constructor)
+{
+ luci::CircleUnique unique_node;
+
+ ASSERT_EQ(luci::CircleDialect::get(), unique_node.dialect());
+ ASSERT_EQ(luci::CircleOpcode::UNIQUE, unique_node.opcode());
+
+ ASSERT_EQ(nullptr, unique_node.input());
+}
+
+TEST(CircleUniqueTest, input_NEG)
+{
+ luci::CircleUnique unique_node;
+ luci::CircleUnique node;
+
+ unique_node.input(&node);
+ ASSERT_NE(nullptr, unique_node.input());
+
+ unique_node.input(nullptr);
+ ASSERT_EQ(nullptr, unique_node.input());
+}
+
+TEST(CircleUniqueTest, arity_NEG)
+{
+ luci::CircleUnique unique_node;
+
+ ASSERT_NO_THROW(unique_node.arg(0));
+ ASSERT_THROW(unique_node.arg(1), std::out_of_range);
+}
+
+TEST(CircleUniqueTest, visit_mutable_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+ {
+ };
+
+ luci::CircleUnique unique_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(unique_node.accept(&tv), std::exception);
+}
+
+TEST(CircleUniqueTest, visit_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeVisitor<void>
+ {
+ };
+
+ luci::CircleUnique unique_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(unique_node.accept(&tv), std::exception);
+}
TEST(CircleWhileTestDeath, invalid_arity_NEG)
{
ASSERT_DEBUG_DEATH(luci::CircleWhile very_long_name_while_node(0, 1), "");
+
+ SUCCEED();
}
TEST(CircleWhileTestDeath, invalid_output_count_NEG)
{
ASSERT_DEBUG_DEATH(luci::CircleWhile while_node(2, 0), "");
+
+ SUCCEED();
}
TEST(CircleWhileTestDeath, invalid_input_get_index_NEG)
IMPLEMENT(luci::CircleMirrorPad)
IMPLEMENT(luci::CircleMul)
IMPLEMENT(luci::CircleNeg)
+ IMPLEMENT(luci::CircleNonMaxSuppressionV4)
IMPLEMENT(luci::CircleNotEqual)
IMPLEMENT(luci::CircleOneHot)
IMPLEMENT(luci::CirclePack)
IMPLEMENT(luci::CircleTopKV2)
IMPLEMENT(luci::CircleTranspose)
IMPLEMENT(luci::CircleTransposeConv)
+ IMPLEMENT(luci::CircleUnique)
IMPLEMENT(luci::CircleUnpack)
IMPLEMENT(luci::CircleWhere)
IMPLEMENT(luci::CircleWhile)
IMPLEMENT(luci::CircleInput)
IMPLEMENT(luci::CircleOutput)
IMPLEMENT(luci::CircleIfOut)
+ IMPLEMENT(luci::CircleNonMaxSuppressionV4Out)
IMPLEMENT(luci::CircleSplitOut)
IMPLEMENT(luci::CircleSplitVOut)
IMPLEMENT(luci::CircleTopKV2Out)
+ IMPLEMENT(luci::CircleUniqueOut)
IMPLEMENT(luci::CircleUnpackOut)
IMPLEMENT(luci::CircleWhileOut)
#undef IMPLEMENT
return use_x(tbl(), node, s);
}
+bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV4 *node,
+ locop::NodeSummary &s) const
+{
+ s.args().append("boxes", pepper::str(node->boxes()));
+ s.args().append("scores", pepper::str(node->scores()));
+ s.args().append("max_output_size", pepper::str(node->max_output_size()));
+ s.args().append("iou_threshold", pepper::str(node->iou_threshold()));
+ s.args().append("score_threshold", pepper::str(node->score_threshold()));
+
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
bool CircleNodeSummaryBuilder::summary(const luci::CircleNotEqual *node,
locop::NodeSummary &s) const
{
return true;
}
+bool CircleNodeSummaryBuilder::summary(const luci::CircleUnique *node, locop::NodeSummary &s) const
+{
+ s.args().append("input", tbl()->lookup(node->input()));
+ s.args().append("idx_out_type", to_str(node->idx_out_type()));
+ s.state(locop::NodeSummary::State::Complete);
+ return true;
+}
+
bool CircleNodeSummaryBuilder::summary(const luci::CircleUnpack *node, locop::NodeSummary &s) const
{
s.args().append("value", tbl()->lookup(node->value()));
return true;
}
+bool CircleNodeSummaryBuilder::summary(const luci::CircleUniqueOut *node,
+ locop::NodeSummary &s) const
+{
+ s.args().append("unique", tbl()->lookup(node->input()));
+
+ s.state(locop::NodeSummary::State::Complete);
+
+ return true;
+}
+
bool CircleNodeSummaryBuilder::summary(const luci::CircleUnpackOut *node,
locop::NodeSummary &s) const
{
return use_input(tbl(), node, s);
}
+bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV4Out *node,
+ locop::NodeSummary &s) const
+{
+ return use_input(tbl(), node, s);
+}
+
bool CircleNodeSummaryBuilder::summary(const luci::CircleWhileOut *node,
locop::NodeSummary &s) const
{
{
static const std::vector<std::string> fakeq_supported_input_dtype{"float32"};
static const std::vector<std::string> fakeq_supported_output_dtype{"uint8"};
- static const std::vector<std::string> fakeq_supported_granularity{"layer"};
+ static const std::vector<std::string> fakeq_supported_granularity{"layer", "channel"};
auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
{
static const std::vector<std::string> qwmm_supported_input_dtype{"float32"};
static const std::vector<std::string> qwmm_supported_output_dtype{"uint8"};
- static const std::vector<std::string> qwmm_supported_granularity{"layer"};
+ static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
const auto index = prefix.find("Tensordot/");
prefix = prefix.substr(0, index - 1);
}
+ else if (prefix.find("/MatMul") != std::string::npos)
+ {
+ const auto index = prefix.find("/MatMul");
+ prefix = prefix.substr(0, index);
+ }
else if (prefix.find("kernel/") != std::string::npos)
{
const auto index = prefix.find("kernel/");
return prefix;
}
+/**
+ * @brief Create CircleOutputExclude operation, which has same shape and dtype with
+ * original circle_node.
+ */
+luci::CircleOutputExclude *createNoOp(luci::CircleNode *circle_node)
+{
+ auto graph = circle_node->graph();
+ auto noOp = graph->nodes()->create<luci::CircleOutputExclude>();
+
+ if (circle_node->shape_status() == luci::ShapeStatus::VALID)
+ {
+ noOp->dtype(circle_node->dtype());
+ noOp->rank(circle_node->rank());
+ for (uint32_t i = 0; i < circle_node->rank(); ++i)
+ noOp->dim(i) = circle_node->dim(i);
+ }
+ else
+ {
+ // For type inference
+ noOp->dtype(loco::DataType::FLOAT32);
+ }
+
+ return noOp;
+};
+
} // namespace
namespace
{
-class BCQConverter final
+// V means the version of BCQ.
+template <int32_t V> class BCQFuser;
+
+template <> class BCQFuser<1>
{
public:
+ bool fuseBCQ(loco::Graph *g)
+ {
+ bool changed = false;
+
+ for (auto node : loco::all_nodes(g))
+ {
+ if (auto circle_const = dynamic_cast<luci::CircleConst *>(node))
+ {
+ add_BCQ_info_node(circle_const);
+ }
+ }
+
+ if (!is_bcqinfo_valid())
+ return false;
+
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto gather = dynamic_cast<luci::CircleGather *>(node))
+ {
+ auto params = dynamic_cast<luci::CircleConst *>(gather->params());
+ if (params != nullptr && has_BCQ_info(params))
+ {
+ auto bcq_gather = g->nodes()->create<luci::CircleBCQGather>();
+
+ bcq_gather->op_version(1);
+ bcq_gather->input_scales(get_alpha(params));
+ bcq_gather->input_binary(get_packed_binary_code(params));
+ bcq_gather->indices(gather->indices());
+ bcq_gather->input_clusters(packed_clusters(params));
+
+ // input_binary shape : [output_size, hidden_size]
+ const auto binary_hidden_size =
+ loco::must_cast<luci::CircleConst *>(bcq_gather->input_binary())->dim(1).value() * 32;
+ bcq_gather->input_hidden_size(binary_hidden_size);
+
+ if (do_w_x(params))
+ {
+ bcq_gather->axis(gather->axis());
+ }
+ else
+ {
+ const auto axis_transpose = (gather->axis() == 0) ? 1 : 0;
+ bcq_gather->axis(axis_transpose);
+ }
+
+ loco::replace(gather).with(bcq_gather);
+
+ changed = true;
+ }
+ }
+ else if (auto fully_connected = dynamic_cast<luci::CircleFullyConnected *>(node))
+ {
+ auto weights = dynamic_cast<luci::CircleConst *>(fully_connected->weights());
+ if (weights != nullptr && has_BCQ_info(weights))
+ {
+ auto bcq_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
+
+ bcq_fc->op_version(1);
+ bcq_fc->weights_scales(get_alpha(weights));
+ bcq_fc->weights_binary(get_packed_binary_code(weights));
+ bcq_fc->bias(fully_connected->bias());
+ bcq_fc->weights_clusters(packed_clusters(weights));
+ bcq_fc->fusedActivationFunction(fully_connected->fusedActivationFunction());
+
+ loco::Node *bcq_input = fully_connected->input();
+ int32_t batch_rank = 0;
+
+ // If input of BCQFullyConnected has more than rank 2, we should reshape it as rank 2
+ const auto original_input = loco::must_cast<luci::CircleNode *>(fully_connected->input());
+ if (original_input->shape_status() == luci::ShapeStatus::VALID &&
+ original_input->rank() > 2)
+ {
+ auto new_shape = g->nodes()->create<luci::CircleConst>();
+ new_shape->dtype(loco::DataType::S32);
+ new_shape->size<loco::DataType::S32>(2);
+ new_shape->rank(1);
+ new_shape->dim(0) = 2;
+
+ auto batch_size = 1;
+ for (uint32_t i = 0; i < original_input->rank() - 1; ++i)
+ batch_size *= original_input->dim(i).value();
+
+ new_shape->at<loco::DataType::S32>(0) = batch_size;
+ new_shape->at<loco::DataType::S32>(1) =
+ original_input->dim(original_input->rank() - 1).value();
+ new_shape->shape_status(luci::ShapeStatus::VALID);
+
+ auto reshape = g->nodes()->create<luci::CircleReshape>();
+ reshape->tensor(original_input);
+ reshape->shape(new_shape);
+
+ bcq_input = reshape;
+ batch_rank = original_input->rank() - 2;
+ }
+
+ // If x_w formation, we should insert Transpose in front and back of BCQFullyConnected
+ if (do_w_x(weights))
+ {
+ const auto binary_hidden_size =
+ loco::must_cast<luci::CircleNode *>(fully_connected->input())
+ ->dim(batch_rank)
+ .value();
+ bcq_fc->weights_hidden_size(binary_hidden_size);
+ bcq_fc->input(bcq_input);
+ loco::replace(fully_connected).with(bcq_fc);
+ }
+ else
+ {
+ const auto binary_hidden_size =
+ loco::must_cast<luci::CircleNode *>(fully_connected->input())
+ ->dim(1 + batch_rank)
+ .value();
+ bcq_fc->weights_hidden_size(binary_hidden_size);
+
+ auto perm = g->nodes()->create<luci::CircleConst>();
+ perm->dtype(loco::DataType::S32);
+ perm->size<loco::DataType::S32>(2);
+ perm->rank(1);
+ perm->dim(0) = 2;
+ perm->at<loco::DataType::S32>(0) = 1;
+ perm->at<loco::DataType::S32>(1) = 0;
+ perm->shape_status(luci::ShapeStatus::VALID);
+
+ auto input_transpose = g->nodes()->create<luci::CircleTranspose>();
+ input_transpose->a(bcq_input);
+ input_transpose->perm(perm);
+
+ bcq_fc->input(input_transpose);
+
+ auto output_transpose = g->nodes()->create<luci::CircleTranspose>();
+ output_transpose->a(bcq_fc);
+ output_transpose->perm(perm);
+
+ loco::replace(fully_connected).with(output_transpose);
+ }
+
+ changed = true;
+ }
+ }
+ }
+
+ if (changed)
+ clear_BCQ_nodes();
+
+ return changed;
+ }
+
+private:
void add_BCQ_info_node(luci::CircleConst *node)
{
const auto node_name = node->name();
return has_info;
}
+ /**
+ * @brief Exclude BCQ information nodes which are used for fusing BCQ operations
+ * from graph output by using CircleOutputExclude
+ */
+ void clear_BCQ_nodes()
+ {
+ auto clear_nodes = [](std::map<std::string, luci::CircleConst *> &nodes) {
+ for (auto &n : nodes)
+ {
+ auto node = n.second;
+
+ for (auto s : loco::succs(node))
+ {
+ if (auto outnode = dynamic_cast<luci::CircleOutput *>(s))
+ {
+ outnode->from(createNoOp(node));
+ }
+ else if (auto reshape_node = dynamic_cast<luci::CircleReshape *>(s))
+ {
+ for (auto o : loco::succs(reshape_node))
+ {
+ auto circle_output = loco::must_cast<luci::CircleOutput *>(o);
+ circle_output->from(createNoOp(reshape_node));
+ }
+ }
+ }
+ }
+ };
+
+ clear_nodes(_do_w_x);
+ clear_nodes(_alpha);
+ clear_nodes(_packed_binary_code);
+ clear_nodes(_number_of_clusters);
+ clear_nodes(_size_of_clusters);
+ clear_nodes(_qbits_of_clusters);
+ clear_nodes(_dequant_weight);
+ }
+
+ bool is_bcqinfo_valid()
+ {
+ // do_w_x should be int32 or bool type
+ for (auto n : _do_w_x)
+ {
+ if (n.second->dtype() != loco::DataType::BOOL && n.second->dtype() != loco::DataType::S32)
+ return false;
+ }
+
+ return true;
+ }
+
+private:
bool do_w_x(luci::CircleConst *node)
{
const auto prefix = node_name_prefix(node->name());
if (_do_w_x[prefix]->dtype() == loco::DataType::S32)
return _do_w_x[prefix]->at<loco::DataType::S32>(0) == 1;
- else if (_do_w_x[prefix]->dtype() == loco::DataType::BOOL)
- return _do_w_x[prefix]->at<loco::DataType::BOOL>(0);
else
- throw std::runtime_error("do_w_x should be int or bool");
+ return _do_w_x[prefix]->at<loco::DataType::BOOL>(0);
}
luci::CircleConst *get_alpha(luci::CircleConst *node)
return packed_clusters;
}
- /**
- * @brief Exclude BCQ information nodes which are used for fusing BCQ operations
- * from graph output by using CircleOutputExclude
- */
- void clear_BCQ_nodes()
- {
- auto createNoOp = [](luci::CircleNode *circle_node) {
- auto graph = circle_node->graph();
- auto noOp = graph->nodes()->create<luci::CircleOutputExclude>();
-
- if (circle_node->shape_status() == luci::ShapeStatus::VALID)
- {
- noOp->dtype(circle_node->dtype());
- noOp->rank(circle_node->rank());
- for (uint32_t i = 0; i < circle_node->rank(); ++i)
- noOp->dim(i) = circle_node->dim(i);
- }
- else
- {
- // For type inference
- noOp->dtype(loco::DataType::FLOAT32);
- }
-
- return noOp;
- };
-
- auto clear_nodes = [createNoOp](std::map<std::string, luci::CircleConst *> &nodes) {
- for (auto &n : nodes)
- {
- auto node = n.second;
-
- for (auto s : loco::succs(node))
- {
- if (auto outnode = dynamic_cast<luci::CircleOutput *>(s))
- {
- outnode->from(createNoOp(node));
- }
- else if (auto reshape_node = dynamic_cast<luci::CircleReshape *>(s))
- {
- for (auto o : loco::succs(reshape_node))
- {
- auto circle_output = loco::must_cast<luci::CircleOutput *>(o);
- circle_output->from(createNoOp(reshape_node));
- }
- }
- }
- }
- };
-
- clear_nodes(_do_w_x);
- clear_nodes(_alpha);
- clear_nodes(_packed_binary_code);
- clear_nodes(_number_of_clusters);
- clear_nodes(_size_of_clusters);
- clear_nodes(_qbits_of_clusters);
- clear_nodes(_dequant_weight);
- }
-
private:
std::map<std::string, luci::CircleConst *> _do_w_x;
std::map<std::string, luci::CircleConst *> _alpha;
bool FuseBCQPass::run(loco::Graph *g)
{
- BCQConverter converter;
-
bool changed = false;
+ // Find BCQ version information and check validity.
+ luci::CircleConst *version_node = nullptr;
for (auto node : loco::all_nodes(g))
{
if (auto circle_const = dynamic_cast<luci::CircleConst *>(node))
{
- converter.add_BCQ_info_node(circle_const);
- }
- }
-
- for (auto node : loco::active_nodes(loco::output_nodes(g)))
- {
- if (auto gather = dynamic_cast<luci::CircleGather *>(node))
- {
- auto params = dynamic_cast<luci::CircleConst *>(gather->params());
- if (params != nullptr && converter.has_BCQ_info(params))
+ if (circle_const->name().find("/bcqinfo_version") != std::string::npos)
{
- auto bcq_gather = g->nodes()->create<luci::CircleBCQGather>();
-
- bcq_gather->input_scales(converter.get_alpha(params));
- bcq_gather->input_binary(converter.get_packed_binary_code(params));
- bcq_gather->indices(gather->indices());
- bcq_gather->input_clusters(converter.packed_clusters(params));
-
- const auto binary_hidden_size =
- loco::must_cast<luci::CircleConst *>(bcq_gather->input_binary())->dim(1).value() * 32;
- bcq_gather->input_hidden_size(binary_hidden_size);
-
- if (converter.do_w_x(params))
- {
- bcq_gather->axis(gather->axis());
- }
- else
+ // There should be only one bcqinfo_version in the model
+ if (version_node != nullptr)
{
- const auto axis_transpose = (gather->axis() == 0) ? 1 : 0;
- bcq_gather->axis(axis_transpose);
+ assert(false && "Multiple version information found");
+ return false;
}
- loco::replace(gather).with(bcq_gather);
-
- changed = true;
+ version_node = circle_const;
}
}
- else if (auto fully_connected = dynamic_cast<luci::CircleFullyConnected *>(node))
- {
- auto weights = dynamic_cast<luci::CircleConst *>(fully_connected->weights());
- if (weights != nullptr && converter.has_BCQ_info(weights))
- {
- auto bcq_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
-
- bcq_fc->weights_scales(converter.get_alpha(weights));
- bcq_fc->weights_binary(converter.get_packed_binary_code(weights));
- bcq_fc->bias(fully_connected->bias());
- bcq_fc->weights_clusters(converter.packed_clusters(weights));
- bcq_fc->fusedActivationFunction(fully_connected->fusedActivationFunction());
-
- loco::Node *bcq_input = fully_connected->input();
- int32_t batch_rank = 0;
+ }
- // If input of BCQFullyConnected has more than rank 2, we should reshape it as rank 2
- const auto original_input = loco::must_cast<luci::CircleNode *>(fully_connected->input());
- if (original_input->shape_status() == ShapeStatus::VALID && original_input->rank() > 2)
- {
- auto new_shape = g->nodes()->create<luci::CircleConst>();
- new_shape->dtype(loco::DataType::S32);
- new_shape->size<loco::DataType::S32>(2);
- new_shape->rank(1);
- new_shape->dim(0) = 2;
-
- auto batch_size = 1;
- for (uint32_t i = 0; i < original_input->rank() - 1; ++i)
- batch_size *= original_input->dim(i).value();
-
- new_shape->at<loco::DataType::S32>(0) = batch_size;
- new_shape->at<loco::DataType::S32>(1) =
- original_input->dim(original_input->rank() - 1).value();
- new_shape->shape_status(ShapeStatus::VALID);
-
- auto reshape = g->nodes()->create<luci::CircleReshape>();
- reshape->tensor(original_input);
- reshape->shape(new_shape);
-
- bcq_input = reshape;
- batch_rank = original_input->rank() - 2;
- }
+ // If version node is not found, regard it as version 1.
+ int32_t bcq_version = (version_node != nullptr) ? version_node->at<loco::DataType::S32>(0) : 1;
- // If x_w formation, we should insert Transpose in front and back of BCQFullyConnected
- if (converter.do_w_x(weights))
- {
- const auto binary_hidden_size =
- loco::must_cast<luci::CircleNode *>(fully_connected->input())
- ->dim(batch_rank)
- .value();
- bcq_fc->weights_hidden_size(binary_hidden_size);
- bcq_fc->input(bcq_input);
- loco::replace(fully_connected).with(bcq_fc);
- }
- else
- {
- const auto binary_hidden_size =
- loco::must_cast<luci::CircleNode *>(fully_connected->input())
- ->dim(1 + batch_rank)
- .value();
- bcq_fc->weights_hidden_size(binary_hidden_size);
-
- auto perm = g->nodes()->create<luci::CircleConst>();
- perm->dtype(loco::DataType::S32);
- perm->size<loco::DataType::S32>(2);
- perm->rank(1);
- perm->dim(0) = 2;
- perm->at<loco::DataType::S32>(0) = 1;
- perm->at<loco::DataType::S32>(1) = 0;
- perm->shape_status(ShapeStatus::VALID);
-
- auto input_transpose = g->nodes()->create<luci::CircleTranspose>();
- input_transpose->a(bcq_input);
- input_transpose->perm(perm);
-
- bcq_fc->input(input_transpose);
-
- auto output_transpose = g->nodes()->create<luci::CircleTranspose>();
- output_transpose->a(bcq_fc);
- output_transpose->perm(perm);
-
- loco::replace(fully_connected).with(output_transpose);
- }
+ if (bcq_version == 1)
+ changed = BCQFuser<1>().fuseBCQ(g);
+ else
+ assert(false && "Not supported BCQ version");
- changed = true;
- }
- }
+ if (changed && version_node != nullptr)
+ {
+ // If BCQ is applied and version node was found, remove the node.
+ loco::replace(version_node).with(createNoOp(version_node));
}
- if (changed)
- converter.clear_BCQ_nodes();
-
return changed;
}
namespace luci
{
+uint8_t fp32_to_uint8_cast(float f)
+{
+ assert(std::numeric_limits<uint8_t>::min() <= f);
+ assert(f <= std::numeric_limits<uint8_t>::max());
+ return static_cast<uint8_t>(f);
+}
+
void compute_sym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
float &nudged_min, float &nudged_max)
{
}
else
zero_point_double = qmin_double - rmin / scale;
- if (zero_point_double <= qmin_double)
+ if (min >= 0)
{
assert(min >= 0 && max >= 0);
nudged_zero_point = kMinScale;
if (min > 0 && max > 0)
WARN(l) << "The minimum and maximum values are all positive." << std::endl;
}
- else if (zero_point_double >= qmax_double)
+ else if (max < 0)
{
assert(min < 0 && max < 0);
nudged_zero_point = kMaxScale;
else
{
assert(min < 0 && max >= 0);
- nudged_zero_point = static_cast<uint8_t>(std::round(zero_point_double));
+ nudged_zero_point = fp32_to_uint8_cast(std::round(zero_point_double));
+ }
+
+ // protect scale from being very low due to overflow
+ if (scale < 1e-5)
+ {
+ scale = 1e-5;
+ nudged_zero_point = fp32_to_uint8_cast(std::round(qmin_double - rmin / scale));
}
nudged_min = static_cast<float>((qmin_double - nudged_zero_point) * scale);
node->dtype() == loco::DataType::S32; // bias
}
-void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor)
+void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor,
+ int32_t &channel_dim_index)
{
assert(node->dtype() == loco::DataType::FLOAT32);
uint32_t indices[4] = {
0,
};
- int channel_dim_index{0};
if (!get_channel_dim_index(node, dimension, channel_dim_index))
{
}
void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
- std::vector<float> &scaling_factor)
+ std::vector<float> &scaling_factor, int32_t &channel_dim_index)
{
assert(node->dtype() == loco::DataType::FLOAT32);
uint32_t indices[4] = {
0,
};
- int channel_dim_index{0};
if (!get_channel_dim_index(node, dimension, channel_dim_index))
{
if (dw_conv != nullptr && dw_conv->filter() == circle_const)
return true;
+ auto t_conv = dynamic_cast<CircleTransposeConv *>(out);
+ if (t_conv != nullptr && t_conv->filter() == circle_const && circle_const->rank() == 4)
+ return true;
+
auto fc = dynamic_cast<CircleFullyConnected *>(out);
if (fc != nullptr && fc->weights() == circle_const)
return true;
circle_node->dtype(loco::DataType::S16);
}
- circle_node->quantparam()->max[0] = nudged_max;
- circle_node->quantparam()->min[0] = nudged_min;
+ circle_node->quantparam()->min.clear();
+ circle_node->quantparam()->max.clear();
circle_node->quantparam()->scale.push_back(scaling_factor);
circle_node->quantparam()->zerop.push_back(zp);
}
assert(quantparam != nullptr);
auto min = quantparam->min;
auto scaling_factor = quantparam->scale;
+ int32_t channel_dim_index = 0;
if (output_type == loco::DataType::U8)
{
- asym_wquant_per_channel(circle_const, min, scaling_factor);
+ asym_wquant_per_channel(circle_const, min, scaling_factor, channel_dim_index);
}
else
{
- sym_wquant_per_channel(circle_const, scaling_factor);
+ sym_wquant_per_channel(circle_const, scaling_factor, channel_dim_index);
}
+ quantparam->min.clear();
+ quantparam->max.clear();
+ quantparam->quantized_dimension = channel_dim_index;
}
// Find min/max per layer-wise
else
auto min = quantparam->min[0];
auto scaling_factor = quantparam->scale[0];
asym_wquant_per_layer(circle_const, min, scaling_factor);
+ quantparam->min.clear();
+ quantparam->max.clear();
}
}
}
loco::NodeShape visit(const luci::CircleNeg *node) final { return use_x(node); }
+ loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4 *node) final
+ {
+ const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
+ return loco::NodeShape{boxes_shape};
+ }
+
loco::NodeShape visit(const luci::CircleNotEqual *node) final { return broadcast_xy(node); }
loco::NodeShape visit(const luci::CircleOneHot *node) final
return output_shape;
}
+ loco::NodeShape visit(const luci::CircleUnique *node) final
+ {
+ auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+
+ assert(input_shape.rank() == 1);
+
+ loco::TensorShape shape_output;
+ shape_output = own_shape(node);
+
+ return loco::NodeShape{shape_output};
+ }
+
loco::NodeShape visit(const luci::CircleTransposeConv *node) final
{
// TransposeConv's output shape is written in its 'inputSizes' argument
return loco::NodeShape{*then_graph_output->shape()};
}
+ loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final
+ {
+ const loco::DataType S32 = loco::DataType::S32;
+
+ auto nmsv4 = dynamic_cast<const luci::CircleNonMaxSuppressionV4 *>(node->input());
+ if (nmsv4 == nullptr)
+ INTERNAL_EXN("CircleNonMaxSuppressionV4 IR is not configured correctly");
+
+ auto index = node->index();
+ if (index == 1)
+ return loco::TensorShape({0});
+
+ assert(index == 0);
+
+ auto unknown = loco::TensorShape{loco::Dimension()};
+ auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv4->max_output_size());
+ if (max_output_size == nullptr)
+ return unknown; // we need CircleConst for max output size
+
+ LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size");
+
+ if (max_output_size->size<S32>() < 1)
+ return unknown;
+
+ auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
+ return loco::TensorShape{max_output_size_value};
+ }
+
loco::NodeShape visit(const luci::CircleSplitOut *node) final
{
const loco::DataType S32 = loco::DataType::S32;
return loco::NodeShape{output_shape};
}
+ loco::NodeShape visit(const luci::CircleUniqueOut *node) final
+ {
+ auto unique = dynamic_cast<const luci::CircleUnique *>(node->input());
+ if (unique == nullptr)
+ {
+ INTERNAL_EXN("CircleUnique IR is not configured correctly");
+ }
+
+ auto unique_shape = loco::shape_get(unique).as<loco::TensorShape>();
+
+ return loco::NodeShape{unique_shape};
+ }
+
loco::NodeShape visit(const luci::CircleUnpackOut *node) final
{
auto unpack = dynamic_cast<const luci::CircleUnpack *>(node->input());
loco::DataType visit(const luci::CircleNeg *node) final { return loco::dtype_get(node->x()); }
+ loco::DataType visit(const luci::CircleNonMaxSuppressionV4 *node) final
+ {
+ return loco::dtype_get(node->boxes());
+ }
+
loco::DataType visit(const luci::CircleNotEqual *) final { return loco::DataType::BOOL; }
loco::DataType visit(const luci::CirclePack *node) final
return loco::dtype_get(node->tensor());
}
- loco::DataType visit(const luci::CircleResizeBilinear *) final { return loco::DataType::FLOAT32; }
+ loco::DataType visit(const luci::CircleResizeBilinear *node) final
+ {
+ return loco::dtype_get(node->input());
+ }
loco::DataType visit(const luci::CircleResizeNearestNeighbor *node) final
{
return loco::dtype_get(node->outBackprop());
}
+ loco::DataType visit(const luci::CircleUnique *node) final
+ {
+ return loco::dtype_get(node->input());
+ }
+
loco::DataType visit(const luci::CircleUnpack *node) final
{
return loco::dtype_get(node->value());
return then_graph_output->dtype();
}
+ loco::DataType visit(const luci::CircleNonMaxSuppressionV4Out *node) final
+ {
+ (void)node;
+ assert(node->index() == 0 || node->index() == 1);
+ return loco::DataType::S32;
+ }
+
loco::DataType visit(const luci::CircleSplitOut *node) final
{
return loco::dtype_get(node->input());
return loco::DataType::S32;
}
+ loco::DataType visit(const luci::CircleUniqueOut *node) final
+ {
+ if (node->index() == 0)
+ {
+ return loco::dtype_get(node->input());
+ }
+ assert(node->index() == 1);
+ auto unique = loco::must_cast<luci::CircleUnique *>(node->input());
+ return unique->idx_out_type();
+ }
+
loco::DataType visit(const luci::CircleUnpackOut *node) final
{
return loco::dtype_get(node->input());
addread(ArgMin_U8_002)
addread(ArgMin_U8_003)
addread(AveragePool2D_000)
+addread(AveragePool2D_U8_000)
addread(BatchMatMul_000)
addread(BatchMatMulV2_000)
addread(BatchMatMulV2_001)
addread(Concatenation_000)
addread(Concatenation_U8_000)
addread(Conv2D_000)
+addread(Conv2D_001)
addread(Conv2D_002)
addread(Conv2D_003)
addread(Conv2D_U8_000)
+addread(Conv2D_U8_001)
addread(Cos_000)
addread(DepthToSpace_000)
addread(DepthwiseConv2D_000)
addread(DepthwiseConv2D_U8_000)
+addread(DepthwiseConv2D_U8_001)
addread(DepthwiseConv2D_001)
addread(Div_000)
addread(ELU_000)
addread(If_000)
addread(If_001)
addread(L2Normalize_000)
+addread(L2Normalize_U8_000)
addread(L2Pool2D_000)
addread(L2Pool2D_U8_000)
addread(LeakyRelu_000)
addread(LogicalNot_000)
addread(LogicalOr_000)
addread(Logistic_000)
+addread(Logistic_U8_000)
addread(LogSoftmax_000)
addread(MatMul_000)
addread(MatrixDiag_000)
addread(MaxPool2D_U8_000)
addread(Mean_000)
addread(Mean_001)
+addread(Mean_U8_000)
addread(Minimum_000)
addread(MirrorPad_000)
addread(Mul_000)
addread(Pack_000)
addread(Pack_U8_000)
addread(Pad_000)
+addread(Pad_U8_000)
addread(Pow_000)
addread(PRelu_000)
addread(Range_000)
addwrite(ArgMin_U8_002)
addwrite(ArgMin_U8_003)
addwrite(AveragePool2D_000)
+addwrite(AveragePool2D_U8_000)
addwrite(BatchMatMul_000)
addwrite(BatchMatMulV2_000)
addwrite(BatchMatMulV2_001)
addwrite(Concatenation_000)
addwrite(Concatenation_U8_000)
addwrite(Conv2D_000)
+addwrite(Conv2D_001)
addwrite(Conv2D_002)
addwrite(Conv2D_003)
addwrite(Conv2D_U8_000)
+addwrite(Conv2D_U8_001)
addwrite(Cos_000)
addwrite(DepthToSpace_000)
addwrite(DepthwiseConv2D_000)
addwrite(DepthwiseConv2D_U8_000)
+addwrite(DepthwiseConv2D_U8_001)
addwrite(DepthwiseConv2D_001)
addwrite(Div_000)
addwrite(ELU_000)
addwrite(If_000)
addwrite(If_001)
addwrite(L2Normalize_000)
+addwrite(L2Normalize_U8_000)
addwrite(L2Pool2D_000)
addwrite(L2Pool2D_U8_000)
addwrite(LeakyRelu_000)
addwrite(LogicalNot_000)
addwrite(LogicalOr_000)
addwrite(Logistic_000)
+addwrite(Logistic_U8_000)
addwrite(LogSoftmax_000)
addwrite(MatMul_000)
addwrite(MatrixDiag_000)
addwrite(MaxPool2D_U8_000)
addwrite(Mean_000)
addwrite(Mean_001)
+addwrite(Mean_U8_000)
addwrite(Minimum_000)
addwrite(MirrorPad_000)
addwrite(Mul_000)
return()
endif(NOT FlatBuffers_FOUND)
-# TODO recover official release version
-# NOTE we cannot use version number like "2.3.0-rc0" for find_package()
-# use TensorFlowSource-2.3.0-rc0 as config itself
-# nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
-nnas_find_package(TensorFlowSource-2.3.0-rc0 QUIET)
+nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
if(NOT TensorFlowSource_FOUND)
return()
set(ONE_COMMAND_FILES
one-import
+ one-import-bcq
one-import-tf
one-import-tflite
one-optimize
install(FILES ${ONE_COMMAND}
PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
- GROUP_READ GROUP_WRITE GROUP_EXECUTE
+ GROUP_READ GROUP_EXECUTE
WORLD_READ WORLD_EXECUTE
DESTINATION bin)
About
-----
-Last update: 2020-07-14
+Last update: 2020-08-03
This document explains about 'one-prepare-venv' command.
'one-prepare-venv' will prepare python3 virtual environment with tensorflow-cpu
-version 2.3.0rc0, recommanded 2.x version as of now, so that 'one-import-tf'
+version 2.3.0, recommanded 2.x version as of now, so that 'one-import-tf'
command can execute properly.
About
-----
-Last update: 2020-07-14
+Last update: 2020-07-31
This document briefly explains how to use one-* commands.
Detailed options are not explained here. Run the command to see options.
Currently supported frameworks are 'tf', 'tflite' for TensorFlow and TensorFlow
lite.
+one-import-bcq
+-------------
+
+This will convert Tensorflow model file (.pb) to our circle model file with applying BCQ.
+To execute this command, original Tensorflow model file must include BCQ information.
+
+This command invokes following scripts internally.
+- preserve_bcq_info : Prevent BCQ information vanishing problem
+- generate_bcq_info : Designate BCQ information nodes as model output automatically
+- tf2tfliteV2 : Convert Tensorflow model to tflite model
+- tflite2circle : Convert Tensorflow Lite model to circle model
+When this command is finished, BCQ information nodes will be removed if BCQ information
+was valid and applying BCQ is done correctly without any errors.
+
+As tf2tfliteV2.py runs TensorFlow lite converter, you need to have TensorFlow
+installed in your system. We recommand to use 2.3.0 for now.
+
+We provide python virtual environment and one-import-bcq will enter and leave
+this environment so that you don't need to explictly 'activate' virtual
+environment.
+
one-import-tf
-------------
converter to convert tflite model to circle model.
As tf2tfliteV2.py runs TensorFlow lite converter, you need to have TensorFlow
-installed in your system. We recommand to use 2.3.0rc0 for now.
+installed in your system. We recommand to use 2.3.0 for now.
We provide python virtual environment and one-import-tf will enter and leave
this environment so that you don't need to explictly 'activate' virtual
function Usage()
{
- echo "Usage: $0 [BACKEND] ..."
+ echo "Usage: one-codegen [BACKEND] ..."
echo "Available BACKEND drivers:"
backend_exist=0
for file in `find $DRIVER_PATH -name *-compile -type f`;
if [ $backend_exist == 0 ]; then
echo " (There is no available backend drivers)"
fi
+
+ exit 255
}
-# Get command from command-line
-BACKEND=$1; shift
-BACKEND_DRIVER="$BACKEND-compile"
+function version()
+{
+ $DRIVER_PATH/one-version one-codegen
+ exit 255
+}
-if [[ -z "${BACKEND_DRIVER}" ]]; then
+# Get command from command-line
+BACKEND=$1
+if [[ -z ${BACKEND} ]]; then
Usage
- exit 255
fi
+shift
+
+if [[ "${BACKEND}" == "--version" ]]; then
+ version
+fi
+
+BACKEND_DRIVER="${BACKEND}-compile"
BACKEND_DRIVER_CMD="${DRIVER_PATH}/${BACKEND_DRIVER}"
if [[ ! -f "${BACKEND_DRIVER_CMD}" ]]; then
echo "ERROR: '${BACKEND_DRIVER}' is not supported"
Usage
- exit 255
fi
"${BACKEND_DRIVER_CMD}" "$@"
function Usage()
{
- echo "Usage: $0 [FRAMEWORK] ..."
+ echo "Usage: one-import [FRAMEWORK] ..."
echo "Available FRAMEWORK drivers:"
framework_exist=0
for file in "$DRIVER_PATH"/one-import-*;
if [ $framework_exist == 0 ]; then
echo " (There is no available import drivers)"
fi
+
+ exit 255
}
-# Get command from command-line
-FRAMEWORK=$1; shift
-FRAMEWORK_DRIVER="one-import-$FRAMEWORK"
+function version()
+{
+ $DRIVER_PATH/one-version one-import-tf
+ exit 255
+}
-if [[ -z "${FRAMEWORK_DRIVER}" ]]; then
+# Get command from command-line
+FRAMEWORK=$1
+if [[ -z ${FRAMEWORK} ]]; then
Usage
- exit 255
+fi
+shift
+
+if [ ${FRAMEWORK} = "--version" ]; then
+ version
fi
+FRAMEWORK_DRIVER="one-import-$FRAMEWORK"
+
FRAMEWORK_DRIVER_CMD="${DRIVER_PATH}/${FRAMEWORK_DRIVER}"
if [[ ! -f "${FRAMEWORK_DRIVER_CMD}" ]]; then
echo "ERROR: '${FRAMEWORK_DRIVER}' is not supported"
Usage
- exit 255
fi
"${FRAMEWORK_DRIVER_CMD}" "$@"
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+usage()
+{
+ echo "Convert TensorFlow model with BCQ to circle."
+ echo "Usage: one-import-bcq"
+ echo " --version Show version information and exit"
+ echo " --input_path <path/to/tfmodel/with/BCQ>"
+ echo " --output_path <path/to/circle>"
+ echo " --input_arrays <names of the input arrays, comma-separated>"
+ echo " --input_shapes <input shapes, colon-separated>"
+ echo " --output_arrays <names of the output arrays, comma-separated>"
+ echo " --v2 Use TensorFlow 2.x interface (default is 1.x interface)"
+ exit 255
+}
+
+version()
+{
+ $DRIVER_PATH/one-version one-import-bcq
+ exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+ CUR="$1"
+
+ case $CUR in
+ '--help')
+ usage
+ ;;
+ '--version')
+ version
+ ;;
+ '--input_path')
+ export INPUT_PATH="$2"
+ shift 2
+ ;;
+ '--output_path')
+ export OUTPUT_PATH="$2"
+ shift 2
+ ;;
+ '--input_arrays')
+ export INPUT_ARRAYS="$2"
+ shift 2
+ ;;
+ '--input_shapes')
+ export INPUT_SHAPES="$2"
+ shift 2
+ ;;
+ '--output_arrays')
+ export OUTPUT_ARRAYS="$2"
+ shift 2
+ ;;
+ '--v2')
+ TF_INTERFACE="--v2"
+ shift
+ ;;
+ *)
+ echo "Unknown parameter: ${CUR}"
+ shift
+ ;;
+ esac
+done
+
+if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then
+ echo "Error: input model not found"
+ echo ""
+ usage
+ exit 2
+fi
+
+FILE_BASE=$(basename ${OUTPUT_PATH})
+MODEL_NAME="${FILE_BASE%.*}"
+
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${DRIVER_PATH}/venv/bin/activate"
+VIRTUALENV_WINDOWS="${DRIVER_PATH}/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+ source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+ source ${VIRTUALENV_WINDOWS}
+fi
+
+# remove previous log
+rm -rf "${OUTPUT_PATH}.log"
+
+# generate temporary preserved pb file
+echo "${DRIVER_PATH}/preserve_bcq_info" --input_path ${INPUT_PATH} \
+--output_path "${TMPDIR}/${MODEL_NAME}_preserved.pb" > "${OUTPUT_PATH}.log"
+echo " " >> "${OUTPUT_PATH}.log"
+
+"${DRIVER_PATH}/preserve_bcq_info" --input_path ${INPUT_PATH} \
+--output_path "${TMPDIR}/${MODEL_NAME}_preserved.pb" >> "${OUTPUT_PATH}.log" 2>&1
+
+# generate output_arrays automatically
+echo "${DRIVER_PATH}/generate_bcq_output_arrays" \
+--input_path "${TMPDIR}/${MODEL_NAME}_preserved.pb" \
+--output_path "${TMPDIR}/${MODEL_NAME}_output_arrays.txt" > "${OUTPUT_PATH}.log"
+echo " " >> "${OUTPUT_PATH}.log"
+
+"${DRIVER_PATH}/generate_bcq_output_arrays" \
+--input_path "${TMPDIR}/${MODEL_NAME}_preserved.pb" \
+--output_path "${TMPDIR}/${MODEL_NAME}_output_arrays.txt" >> "${OUTPUT_PATH}.log" 2>&1
+
+# generate temporary tflite file
+CONVERT_SCRIPT="python ${DRIVER_PATH}/tf2tfliteV2.py ${TF_INTERFACE} "
+CONVERT_SCRIPT+="--input_path ${TMPDIR}/${MODEL_NAME}_preserved.pb "
+CONVERT_SCRIPT+="--input_arrays ${INPUT_ARRAYS} "
+CONVERT_SCRIPT+="--output_path ${TMPDIR}/${MODEL_NAME}.tflite "
+CONVERT_SCRIPT+="--output_arrays ${OUTPUT_ARRAYS}$(cat ${TMPDIR}/${MODEL_NAME}_output_arrays.txt) "
+if [ ! -z ${INPUT_SHAPES} ]; then
+ CONVERT_SCRIPT+="--input_shapes ${INPUT_SHAPES} "
+fi
+
+echo ${CONVERT_SCRIPT} > "${OUTPUT_PATH}.log"
+$CONVERT_SCRIPT >> "${OUTPUT_PATH}.log" 2>&1
+
+# convert .tflite to .circle
+echo " " >> "${OUTPUT_PATH}.log"
+echo "${DRIVER_PATH}/tflite2circle" "${TMPDIR}/${MODEL_NAME}.tflite" \
+"${OUTPUT_PATH}" >> "${OUTPUT_PATH}.log"
+echo " " >> "${OUTPUT_PATH}.log"
+
+"${DRIVER_PATH}/tflite2circle" "${TMPDIR}/${MODEL_NAME}.tflite" \
+"${OUTPUT_PATH}" >> "${OUTPUT_PATH}.log" 2>&1
{
echo "Convert TensorFlow model to circle."
echo "Usage: one-import-tf"
+ echo " --version Show version information and exit"
echo " --input_path <path/to/tfmodel>"
echo " --output_path <path/to/circle>"
echo " --input_arrays <names of the input arrays, comma-separated>"
echo " --input_shapes <input shapes, colon-separated>"
echo " --output_arrays <names of the output arrays, comma-separated>"
- exit 0
+ echo " --v2 Use TensorFlow 2.x interface (default is 1.x interface)"
+ exit 255
}
+version()
+{
+ $DRIVER_PATH/one-version one-import-tf
+ exit 255
+}
+
+TF_INTERFACE="--v1"
+
# Parse command-line arguments
#
while [ "$#" -ne 0 ]; do
'--help')
usage
;;
+ '--version')
+ version
+ ;;
'--input_path')
export INPUT_PATH="$2"
shift 2
export OUTPUT_ARRAYS="$2"
shift 2
;;
+ '--v2')
+ TF_INTERFACE="--v2"
+ shift
+ ;;
*)
echo "Unknown parameter: ${CUR}"
shift
# remove previous log
rm -rf "${OUTPUT_PATH}.log"
+show_err_onexit()
+{
+ cat "${OUTPUT_PATH}.log"
+}
+
+trap show_err_onexit ERR
+
# generate temporary tflite file
-echo "python" "${DRIVER_PATH}/tf2tfliteV2.py" --v2 --input_path ${INPUT_PATH} \
+echo "python" "${DRIVER_PATH}/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${INPUT_PATH} \
--input_arrays ${INPUT_ARRAYS} --input_shapes ${INPUT_SHAPES} \
--output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
--output_arrays ${OUTPUT_ARRAYS} > "${OUTPUT_PATH}.log"
echo " " >> "${OUTPUT_PATH}.log"
-python "${DRIVER_PATH}/tf2tfliteV2.py" --v2 --input_path ${INPUT_PATH} \
+python "${DRIVER_PATH}/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${INPUT_PATH} \
--input_arrays ${INPUT_ARRAYS} --input_shapes ${INPUT_SHAPES} \
--output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
--output_arrays ${OUTPUT_ARRAYS} >> "${OUTPUT_PATH}.log" 2>&1
{
echo "Convert TensorFlow lite model to circle."
echo "Usage: one-import-tflite"
+ echo " --version Show version information and exit"
echo " --input_path <path/to/tflitemodel>"
echo " --output_path <path/to/circle>"
- exit 0
+ exit 255
+}
+
+version()
+{
+ $DRIVER_PATH/one-version one-import-tflite
+ exit 255
}
# Parse command-line arguments
'--help')
usage
;;
+ '--version')
+ version
+ ;;
'--input_path')
export INPUT_PATH="$2"
shift 2
echo "Error: input model not found"
echo ""
usage
- exit 2
fi
# remove previous log
rm -rf "${OUTPUT_PATH}.log"
+show_err_onexit()
+{
+ cat "${OUTPUT_PATH}.log"
+}
+
+trap show_err_onexit ERR
+
# convert .tflite to .circle
echo "${DRIVER_PATH}/tflite2circle" "${INPUT_PATH}" "${OUTPUT_PATH}" > "${OUTPUT_PATH}.log"
{
echo "Optimize circle model."
echo "Usage: one-optimize"
+ echo " --version Show version information and exit"
echo " --all Enable all optimization algorithms"
echo " --fuse_bcq Enable FuseBCQ Pass"
echo " --fuse_instnorm Enable FuseInstanceNormalization Pass"
echo " Enable ResolveCustomOpMatMulPass Pass"
echo " --input_path <path/to/input/circle>"
echo " --output_path <path/to/output/circle>"
- exit 0
+ exit 255
+}
+
+version()
+{
+ $DRIVER_PATH/one-version one-optimize
+ exit 255
}
OPTIMIZE_all=0
'--help')
usage
;;
+ '--version')
+ version
+ ;;
'--all')
OPTIMIZE_all=1
shift
echo "Error: input model not found"
echo ""
usage
- exit 2
fi
OPTIMIZE_OPTIONS=""
# remove previous log
rm -rf "${OUTPUT_PATH}.log"
+show_err_onexit()
+{
+ cat "${OUTPUT_PATH}.log"
+}
+
+trap show_err_onexit ERR
+
# NOTE do not wrap ${OPTIMIZE_OPTIONS} with ""
# optimize circle
echo "${DRIVER_PATH}/circle2circle" ${OPTIMIZE_OPTIONS} \
{
echo "Package circle to nnpkg"
echo "Usage: one-pack"
+ echo " -v, --version Show version information and exit"
echo " -i <path/to/circle>"
echo " -o <path/to/nnpackage/folder>"
- exit 0
+ exit 255
+}
+
+version()
+{
+ $DRIVER_PATH/one-version one-pack
+ exit 255
}
# Parse command-line arguments
'--help')
usage
;;
+ '-v')
+ version
+ ;;
+ '--version')
+ version
+ ;;
'-i')
export INPUT_PATH="$2"
shift 2
echo "Error: input model not found"
echo ""
usage
- exit 2
fi
+INPUT_FILE=$(basename "${INPUT_PATH}")
+LOG_FILE="${INPUT_FILE%.*}.pack.log"
+
# remove previous log
-rm -rf "${OUTPUT_PATH}.log"
+rm -rf "${LOG_FILE}"
+
+show_err_onexit()
+{
+ cat "${LOG_FILE}"
+}
+
+trap show_err_onexit ERR
# Package circle model file to nnpkg
-echo "${DRIVER_PATH}/model2nnpkg.sh" -o "${OUTPUT_PATH}" "${INPUT_PATH}" > "${OUTPUT_PATH}.log"
+echo "${DRIVER_PATH}/model2nnpkg.sh" -o "${OUTPUT_PATH}" "${INPUT_PATH}" > "${LOG_FILE}"
-"${DRIVER_PATH}/model2nnpkg.sh" -o "${OUTPUT_PATH}" "${INPUT_PATH}" >> "${OUTPUT_PATH}.log" 2>&1
+"${DRIVER_PATH}/model2nnpkg.sh" -o "${OUTPUT_PATH}" "${INPUT_PATH}" >> "${LOG_FILE}" 2>&1
fi
# Install prerequisites
-python3 -m pip install -U virtualenv
+python3 -m pip install --user -U virtualenv
+
+function error_no_ensurepip ()
+{
+ echo "ERROR: python3 'ensurepip' module is not found."
+ echo " On ubuntu, try following command:"
+ echo
+ echo " apt install python$(python3 --version | awk '{print $2}' | awk -F. '{print $1"."$2}')-venv"
+ echo
+ echo " You may need root privilege for this."
+ exit 1
+}
+python3 -m ensurepip --version > /dev/null 2>&1 || error_no_ensurepip
# Create python virtual enviornment
python3 -m venv "${DRIVER_PATH}/venv"
python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
install -U pip setuptools
python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
- install tensorflow-cpu==2.3.0rc0
+ install tensorflow-cpu==2.3.0
{
echo "Quantize circle model."
echo "Usage: one-quantize"
+ echo " --version Show version information and exit"
echo " --input_dtype Input data type (supported: float32, default=float32)"
echo " --quantized_dtype Output quantized data type (supported: uint8, default=uint8)"
- echo " --granularity Quantize granularity (supported: layer, default=layer)"
+ echo " --granularity Quantize granularity (supported: layer, channel, default=layer)"
echo " --min_percentile Minimum percentile (0.0~100.0, default=1.0)"
echo " --max_percentile Maximum percentile (0.0~100.0, default=99.0)"
echo " --mode Record mode (supported: percentile/moving_average, default=percentile)"
echo " --input_path <path/to/input/circle>"
echo " --input_data <path/to/input/data>"
echo " --output_path <path/to/output/circle>"
- exit 0
+ exit 255
+}
+
+version()
+{
+ $DRIVER_PATH/one-version one-quantize
+ exit 255
}
INPUT_DTYPE=float32
'--help')
usage
;;
+ '--version')
+ version
+ ;;
'--input_dtype')
INPUT_DTYPE="$2"
echo "Error: input model not found"
echo ""
usage
- exit 2
fi
if [ -z ${INPUT_DATA} ] || [ ! -e ${INPUT_DATA} ]; then
echo "Error: input data not found"
echo ""
usage
- exit 2
fi
FILE_BASE=$(basename ${OUTPUT_PATH})
# remove previous log
rm -rf "${OUTPUT_PATH}.log"
+show_err_onexit()
+{
+ cat "${OUTPUT_PATH}.log"
+}
+
+trap show_err_onexit ERR
+
# quantize circle
echo "${DRIVER_PATH}/circle-quantizer" \
--quantize_dequantize_weights ${INPUT_DTYPE} ${QUANTIZED_DTYPE} ${GRANULARITY} \
require("circle2circle")
require("circle-quantizer")
require("record-minmax")
+require("vconone")
+require("bcq-tools")
${QUANTIZATION_VALUE_TEST_WITH_PARAM}
)
-#add_test(
-# NAME pota_record_minmax_test
-# COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_record_minmax.sh"
-# "${TEST_CONFIG}"
-# "${ARTIFACTS_BIN_PATH}"
-# ${QUANTIZATION_VALUE_TEST_WITH_PARAM}
-#)
+add_test(
+ NAME pota_record_minmax_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_record_minmax.sh"
+ "${TEST_CONFIG}"
+ "${ARTIFACTS_BIN_PATH}"
+ ${QUANTIZATION_VALUE_TEST_WITH_PARAM}
+)
-#add_test(
-# NAME pota_quantization_test
-# COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_quantization.sh"
-# "${TEST_CONFIG}"
-# "${ARTIFACTS_BIN_PATH}"
-# ${QUANTIZATION_VALUE_TEST_WITH_PARAM}
-#)
+add_test(
+ NAME pota_quantization_test
+ COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_quantization.sh"
+ "${TEST_CONFIG}"
+ "${ARTIFACTS_BIN_PATH}"
+ ${QUANTIZATION_VALUE_TEST_WITH_PARAM}
+)
-#set_tests_properties(pota_record_minmax_test PROPERTIES DEPENDS pota_fake_wquant_test)
-#set_tests_properties(pota_quantization_test PROPERTIES DEPENDS pota_record_minmax_test)
+set_tests_properties(pota_record_minmax_test PROPERTIES DEPENDS pota_fake_wquant_test)
+set_tests_properties(pota_quantization_test PROPERTIES DEPENDS pota_record_minmax_test)
if key == "weights":
expected_weights = np.array(json_load["weights"])
input_weights = tensor["weights"][:]
- if np.allclose(input_weights, expected_weights, rtol=0, atol=0) == False:
+ if np.allclose(input_weights, expected_weights, rtol=0, atol=1) == False:
print("Quantized weights of " + tensor_name + " (" + str(input_weights) +
") do not match with expected value (" + str(expected_weights) +
").")
expected_zero_point = np.array(json_load["zero_point"])
input_zero_point = tensor["zero_point"][:]
if np.allclose(
- input_zero_point, expected_zero_point, rtol=0, atol=0) == False:
+ input_zero_point, expected_zero_point, rtol=0, atol=1) == False:
print("Quantized zero_point of " + tensor_name + " (" +
str(input_zero_point) + ") do not match with expected value (" +
str(expected_zero_point) + ").")
[
[
[
- 1.003921627998352,
- 2.007843255996704
- ],
+ 1.0039215087890625,
+ 2.007843017578125
+ ],
[
- -3.0117647647857666,
+ -3.0117650032043457,
-4.015686511993408
]
- ],
+ ],
[
[
- -5.019608020782471,
- 6.023529529571533
- ],
+ -5.019608497619629,
+ 6.023530006408691
+ ],
[
- -7.027451038360596,
- 7.968627452850342
+ -7.027451515197754,
+ 7.9686279296875
]
]
- ],
+ ],
[
[
[
- 4.015686511993408,
- -2.007843255996704
- ],
+ 4.01568603515625,
+ -2.007843494415283
+ ],
[
- 3.0117647647857666,
- -1.003921627998352
+ 3.0117645263671875,
+ -1.0039215087890625
]
- ],
+ ],
[
[
- -7.968627452850342,
- -6.023529529571533
- ],
+ -7.9686279296875,
+ -6.023530006408691
+ ],
[
- 7.027451038360596,
- 5.019608020782471
+ 7.027451515197754,
+ 5.019608497619629
]
]
]
- {
- "scale": 0.0059054209919261825,
- "weights": [
- 169.0,
- 339.0
- ]
- }
+{
+ "weights": [
+ 4069,
+ 8138
+ ],
+ "scale": 0.0002457468386200985
+}
{
- "scale": 0.09411764705882353,
+ "scale": 0.003916590008884668,
"zero_point": 0.0
}
{
- "max": 7.968627450980392,
- "scale": 0.06274509803921569,
"weights": [
[
[
[
- 144,
- 160
- ],
+ 143,
+ 159
+ ],
[
- 80,
- 64
+ 79,
+ 63
]
- ],
+ ],
[
[
- 48,
- 224
- ],
+ 47,
+ 223
+ ],
[
- 16,
- 255
+ 15,
+ 254
]
]
- ],
+ ],
[
[
[
- 192,
- 96
- ],
+ 191,
+ 95
+ ],
[
- 176,
- 112
+ 175,
+ 111
]
- ],
+ ],
[
[
- 1,
- 32
- ],
+ 0,
+ 31
+ ],
[
- 240,
- 208
+ 239,
+ 207
]
]
]
- ],
- "min": -8.031372549019608,
- "zero_point": 128.0
+ ],
+ "scale": 0.062745101749897,
+ "zero_point": 127.0,
+ "min": -7.9686279296875,
+ "max": 8.031373023986816
}
{
- "scale": 0.17836222929113052,
+ "scale": 0.037479765713214874,
"zero_point": 0.0
}
{
- "max": 24.0,
- "min": 1.0
+ "min": 0.005472412034869194,
+ "max": 0.9987304735183716
}
{
- "max": 45.48236846923828,
- "min": 0.0
+ "min": 0.0,
+ "max": 9.557340850830078
}
[
[
[
- 0.9725490212440491,
- 1.9450980424880981,
- 3.0392158031463623,
+ 0.9725494384765625,
+ 1.945098876953125,
+ 3.039216995239258,
4.0117645263671875
- ],
+ ],
[
- -8.996078491210938,
- 9.968626976013184,
- -10.941176414489746,
- 12.035294532775879
+ -8.996077537536621,
+ 9.9686279296875,
+ -10.94117546081543,
+ 12.035295486450195
]
- ],
+ ],
[
[
- 4.984313488006592,
- 5.956862926483154,
- 7.050980567932129,
- 8.023529052734375
- ],
+ 4.98431396484375,
+ 5.9568634033203125,
+ 7.050981521606445,
+ 8.023530960083008
+ ],
[
- 13.007843017578125,
- -13.980392456054688,
- 14.952940940856934,
+ 13.007843017578125,
+ -13.980391502380371,
+ 14.95294189453125,
-16.04705810546875
]
]
{
- "scale": 0.007627835447904652,
"weights": [
- 131.0,
- 262.0,
- 393.0,
- 524.0
- ]
+ 2156,
+ 4312,
+ 6468,
+ 8624
+ ],
+ "scale": 0.0004638272181067826
}
{
- "scale": 0.06274509803921569,
+ "scale": 0.0038153529167175293,
"zero_point": 0.0
}
{
- "max": 14.952941176470588,
- "scale": 0.12156862745098039,
"weights": [
[
[
[
- 140,
- 148,
- 157,
+ 140,
+ 148,
+ 157,
165
- ],
+ ],
[
- 58,
- 214,
- 42,
+ 58,
+ 214,
+ 42,
231
]
- ],
+ ],
[
[
- 173,
- 181,
- 190,
+ 173,
+ 181,
+ 190,
198
- ],
+ ],
[
- 239,
- 17,
- 255,
+ 239,
+ 17,
+ 255,
0
]
]
]
- ],
- "min": -16.04705882352941,
- "zero_point": 132.0
+ ],
+ "scale": 0.12156862765550613,
+ "zero_point": 132.0,
+ "min": -16.04705810546875,
+ "max": 14.952940940856934
}
{
- "scale": 0.893733185412837,
+ "scale": 0.07362665981054306,
"zero_point": 0.0
}
{
- "max": 16.0,
- "min": 1.0
+ "min": 0.02638142943382263,
+ "max": 0.9729149651527405
}
{
- "max": 227.90196228027344,
- "min": 0.0
+ "min": 0.0,
+ "max": 18.77479721069336
}
--- /dev/null
+{
+ "weights": [
+ [
+ 1.0039215087890625,
+ 2.007843017578125,
+ -3.0117650032043457,
+ -4.015686511993408,
+ -5.019608497619629,
+ 6.023530006408691,
+ -7.027451515197754,
+ 7.9686279296875,
+ 4.01568603515625,
+ -2.007843494415283,
+ 3.0117645263671875,
+ -1.0039215087890625,
+ -7.9686279296875,
+ -6.023530006408691,
+ 7.027451515197754,
+ 5.019608497619629
+ ],
+ [
+ 1.0039215087890625,
+ 2.007843017578125,
+ -3.0117650032043457,
+ -4.015686511993408,
+ -5.019608497619629,
+ 6.023530006408691,
+ -7.027451515197754,
+ 7.9686279296875,
+ 4.01568603515625,
+ -2.007843494415283,
+ 3.0117645263671875,
+ -1.0039215087890625,
+ -7.9686279296875,
+ -6.023530006408691,
+ 7.027451515197754,
+ 5.019608497619629
+ ],
+ [
+ 1.0039215087890625,
+ 2.007843017578125,
+ -3.0117650032043457,
+ -4.015686511993408,
+ -5.019608497619629,
+ 6.023530006408691,
+ -7.027451515197754,
+ 7.9686279296875,
+ 4.01568603515625,
+ -2.007843494415283,
+ 3.0117645263671875,
+ -1.0039215087890625,
+ -7.9686279296875,
+ -6.023530006408691,
+ 7.027451515197754,
+ 5.019608497619629
+ ],
+ [
+ 1.0039215087890625,
+ 2.007843017578125,
+ -3.0117650032043457,
+ -4.015686511993408,
+ -5.019608497619629,
+ 6.023530006408691,
+ -7.027451515197754,
+ 7.9686279296875,
+ 4.01568603515625,
+ -2.007843494415283,
+ 3.0117645263671875,
+ -1.0039215087890625,
+ -7.9686279296875,
+ -6.023530006408691,
+ 7.027451515197754,
+ 5.019608497619629
+ ]
+ ]
+}
--- /dev/null
+{
+ "weights": [
+ 415,
+ -829,
+ -1244,
+ 1658
+ ],
+ "scale": 0.00241205753304663
+}
--- /dev/null
+{
+ "scale": 0.03844216465950012,
+ "zero_point": 126.0
+}
--- /dev/null
+{
+ "scale": 0.741962730884552,
+ "zero_point": 156.0
+}
--- /dev/null
+{
+ "weights": [
+ [
+ 143,
+ 159,
+ 79,
+ 63,
+ 47,
+ 223,
+ 15,
+ 254,
+ 191,
+ 95,
+ 175,
+ 111,
+ 0,
+ 31,
+ 239,
+ 207
+ ],
+ [
+ 143,
+ 159,
+ 79,
+ 63,
+ 47,
+ 223,
+ 15,
+ 254,
+ 191,
+ 95,
+ 175,
+ 111,
+ 0,
+ 31,
+ 239,
+ 207
+ ],
+ [
+ 143,
+ 159,
+ 79,
+ 63,
+ 47,
+ 223,
+ 15,
+ 254,
+ 191,
+ 95,
+ 175,
+ 111,
+ 0,
+ 31,
+ 239,
+ 207
+ ],
+ [
+ 143,
+ 159,
+ 79,
+ 63,
+ 47,
+ 223,
+ 15,
+ 254,
+ 191,
+ 95,
+ 175,
+ 111,
+ 0,
+ 31,
+ 239,
+ 207
+ ]
+ ],
+ "scale": 0.062745101749897,
+ "zero_point": 127.0,
+ "min": -7.9686279296875,
+ "max": 8.031373023986816
+}
--- /dev/null
+{
+ "min": -4.832756385803223,
+ "max": 4.969995346069336
+}
--- /dev/null
+{
+ "min": -115.99438369750976,
+ "max": 73.20612327575684
+}
--- /dev/null
+{
+ "weights": [
+ [
+ [
+ [
+ 0.960784912109375,
+ 2.0588245391845703
+ ],
+ [
+ -3.0196075439453125,
+ -3.980391502380371
+ ],
+ [
+ 4.9411773681640625,
+ -6.039215087890625
+ ]
+ ],
+ [
+ [
+ 7.0,
+ 7.960784912109375
+ ],
+ [
+ -9.058823585510254,
+ -10.019607543945312
+ ],
+ [
+ 10.980392456054688,
+ -11.941176414489746
+ ]
+ ],
+ [
+ [
+ 13.039216995239258,
+ 14.000001907348633
+ ],
+ [
+ -14.960784912109375,
+ -16.05882453918457
+ ],
+ [
+ 17.019607543945312,
+ -17.980392456054688
+ ]
+ ]
+ ]
+ ]
+}
--- /dev/null
+{
+ "scale": 0.03869570419192314,
+ "zero_point": 126.0
+}
--- /dev/null
+{
+ "weights": [
+ [
+ [
+ [
+ 138,
+ 146
+ ],
+ [
+ 109,
+ 102
+ ],
+ [
+ 167,
+ 87
+ ]
+ ],
+ [
+ [
+ 182,
+ 189
+ ],
+ [
+ 65,
+ 58
+ ],
+ [
+ 211,
+ 44
+ ]
+ ],
+ [
+ [
+ 226,
+ 233
+ ],
+ [
+ 22,
+ 14
+ ],
+ [
+ 255,
+ 0
+ ]
+ ]
+ ]
+ ],
+ "scale": 0.13725490868091583,
+ "zero_point": 131.0,
+ "min": -17.980392456054688,
+ "max": 17.019609451293945
+}
--- /dev/null
+{
+ "scale": 1.6333034038543701,
+ "zero_point": 127.0
+}
--- /dev/null
+{
+ "min": -4.890846576690674,
+ "max": 4.976558513641357
+}
--- /dev/null
+{
+ "min": -207.54233032226563,
+ "max": 208.95002136230468
+}
addTest(Conv2D_004 layer uint8)
addTest(DepthwiseConv2D_002 layer uint8)
+addTest(FullyConnected_003 layer uint8)
+addTest(TransposeConv_001 layer uint8)
-1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24
+0.01090685,0.0581577 ,0.637094 ,0.64067715,0.26264507,0.13692169,0.9649414 ,0.5117181 ,0.18012471,0.07855253,0.6358017 ,0.62257963,0.41469443,0.93169045,0.20763828,0.7634293 ,0.75929826,0.72708374,0.23463063,0.58222896,0.6351517 ,0.68781173,0.5558012 ,0.7652179
--- /dev/null
+0.57017624,0.08235867,0.03672464,0.40372616,0.7353964 ,0.59611887,0.7675548 ,0.21004233,0.09803218,0.20009473,0.8821493 ,0.17015271,0.14840214,0.99910176,0.37003204,0.22893582,0.43173164,0.3105084 ,0.41997132,0.43714985,0.08115962,0.71896386,0.7810953 ,0.00524598
--- /dev/null
+0.65292275,0.79842275,0.97853714,0.6711518 ,0.607567 ,0.40971732,0.74838483,0.95853555,0.32158023,0.911524 ,0.66938365,0.8573132 ,0.3047727 ,0.5561248 ,0.914098 ,0.07650814,0.37868017,0.29269257,0.19652605,0.63025194,0.61496884,0.32011527,0.8204132 ,0.21866946
--- /dev/null
+0.4548901 ,0.56957537,0.0252368 ,0.4884317 ,0.7516498 ,0.02631272,0.22107519,0.95249426,0.34902394,0.11520014,0.808911 ,0.4148615 ,0.63615656,0.84020686,0.3633697 ,0.23993976,0.54176176,0.86938345,0.81628686,0.6380988 ,0.91891205,0.0406627 ,0.90289026,0.9429013
--- /dev/null
+0.9309136 ,0.02123719,0.64467335,0.6910113 ,0.47402772,0.54622203,0.31527275,0.81530565,0.98981965,0.36102158,0.03114039,0.1902339 ,0.45183742,0.60178596,0.4683102 ,0.59810966,0.40558222,0.5420302 ,0.72699505,0.9575108 ,0.46746576,0.08518691,0.40302262,0.69213694
-1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12, 13, 14, 15, 16
+0.31365377,0.6127105 ,0.7047126 ,0.2511918 ,0.16652136,0.36075932,0.44332707,0.77615815,0.60456425,0.26207635,0.28714025,0.11579613,0.89698446,0.67223394,0.3757766 ,0.11787009
--- /dev/null
+0.9409595 ,0.3991174 ,0.43546647,0.221152 ,0.7794665 ,0.8619514 ,0.5903087 ,0.24476172,0.5932698 ,0.2727837 ,0.3980262 ,0.13329633,0.4319272 ,0.37872055,0.1721639 ,0.92437047
--- /dev/null
+0.6484028 ,0.09222967,0.76285905,0.02265582,0.2564394 ,0.11219095,0.22529566,0.09101159,0.15937322,0.3540595 ,0.25971088,0.4681136 ,0.4279646 ,0.5386553 ,0.11397707,0.7413688
--- /dev/null
+0.9182678 ,0.8253187 ,0.6572848 ,0.46436486,0.45208713,0.42112917,0.24383743,0.16039051,0.24649048,0.63431305,0.31141657,0.25664324,0.721266 ,0.18996912,0.35422477,0.8826148
--- /dev/null
+0.97424644,0.9360494 ,0.6849295 ,0.21313633,0.23943195,0.32497332,0.5091704 ,0.67543274,0.49667478,0.73460567,0.5866559 ,0.5312464 ,0.8252662 ,0.36093768,0.7143621 ,0.7234413
--- /dev/null
+ 2.7731526 , 2.451602 , 3.7535272 ,-1.2774152 , 1.5482912 , 1.3402948 , 4.4792123 ,-4.4954367 , 3.354679 ,-3.3615496 ,-4.619757 ,-3.3659618 , 4.7626247 ,-1.3596478 ,-4.835548 , 0.78964525
--- /dev/null
+ 0.5400839 ,-3.2621996 ,-3.4817135 , 3.8183312 , 0.48498327, 2.9812584 , 4.111276 , 0.11223658, 4.7201405 , 2.4256718 , 1.4895477 , 4.7596602 ,-0.32709372, 1.3507305 ,-0.30043927,-1.8077502
--- /dev/null
+ 3.8758078 , 4.978636 ,-0.22925885,-2.6760504 ,-1.9160627 ,-4.609644 ,-0.9515802 , 3.558274 , 2.9096057 , 0.3340422 , 0.38608226,-0.32168412, 4.688853 ,-4.583811 ,-2.5113506 ,-4.6688786
--- /dev/null
+-2.9868221 , 2.4237797 , 1.0833962 ,-0.9231426 ,-2.1091506 ,-2.6163697 ,-0.23101932,-1.9252896 , 4.7034135 , 3.1088963 ,-2.345823 ,-2.7866168 ,-3.186763 ,-4.431844 , 3.3113294 , 0.9501982
--- /dev/null
+ 3.9716747 ,-2.254871 , 1.1943274 ,-2.212602 , 3.4311683 , 1.114989 , 4.0739036 , 0.47244295,-3.5793104 ,-3.359908 ,-4.7657595 , 2.0369127 ,-2.5619278 ,-3.4452975 ,-4.5852203 ,-1.137643
--- /dev/null
+-1.4124781 , 0.42694193, 1.1734594 ,-3.5111153 ,-2.9756174 , 1.3682148 ,-2.318465 , 2.198896 ,-4.5043235 , 3.1775594 ,-0.42802384,-1.4872279 , 1.3821319 ,-4.771963 ,-0.12837897, 4.132799 , 3.697655 , 2.0807178 ,-3.621293 , 2.121878 ,-0.25654107, 0.42100102,-1.4009671 ,-2.9733627 ,-0.7058871 ,-2.831215 , 3.5669627 , 2.1420689 ,-1.8789555 , 0.8104939 ,-2.0503597 , 1.7788508
--- /dev/null
+ 3.4726453 , 3.0497985 ,-4.234619 ,-1.0526706 , 1.7278554 ,-3.341614 , 4.54768 , 3.0954597 ,-3.735109 , 2.8810751 ,-2.5381427 ,-3.2360535 ,-1.5378917 , 2.3052745 ,-3.170938 ,-3.327242 , 2.0654576 ,-2.2294598 ,-1.881382 , 0.13216451,-4.2825613 , 0.26616526, 4.6196365 ,-0.88623226, 1.7103885 ,-1.5865034 ,-3.9114466 ,-3.2227128 , 4.909618 , 2.3318915 , 0.84300846, 0.760918
--- /dev/null
+-4.6097918,-4.21991 ,-3.9955974, 3.6492047, 2.9191775, 2.8082933, 1.6189331, 0.2730309,-1.5029653,-1.9471445, 4.8758197, 3.3177438, 3.1338058,-2.1281245,-1.7526287,-2.5518703,-1.7746793, 4.0455256,-0.5839861,-4.408046 ,-4.0034447, 1.5858272,-4.5896654, 4.7211285,-4.677515 ,-2.6027086,-4.7896166,-3.5512326,-1.9068764,-2.9705904,-4.854087 ,-4.892111
--- /dev/null
+ 2.1514777e-02, 2.6526773e+00,-3.0477784e+00, 1.3287724e+00,-4.1414630e-01,-1.7295350e-01, 7.6649576e-01,-1.8028022e+00,-7.0781744e-01,-2.5262204e-01,-3.0970418e+00,-1.3165286e+00,-4.6649928e+00, 2.0809033e+00,-1.5739973e+00,-4.0531826e-01,-2.1718202e+00, 2.0146034e+00, 2.5044403e+00,-1.1256610e+00, 1.3536702e+00, 1.0283234e-03,-1.8823910e+00, 4.7122188e+00, 9.4781297e-01, 3.2012525e+00,-5.5164534e-01,-2.6158772e+00,-1.8771547e+00,-3.1689723e+00, 4.9054880e+00,-3.4560370e+00
--- /dev/null
+-2.0927553 ,-2.107511 ,-1.6963564 , 1.7006218 , 1.4575784 , 0.06095728, 1.2659966 , 4.1905265 , 1.3035946 , 4.9793477 ,-4.3388166 ,-0.23496658, 1.9831208 , 2.6154642 ,-0.2790228 ,-3.1774354 ,-3.178935 ,-1.1564373 ,-0.8199472 ,-2.245698 ,-4.8605046 ,-3.569018 ,-1.4226891 ,-4.1067843 , 2.6078918 ,-3.5830674 , 1.9065963 , 2.435578 ,-3.3216476 , 4.5930347 , 2.9191844 , 1.7885648
# Run record-minmax
"${RECORD_MINMAX_PATH}" \
- "${TEST_RESULT_FILE}.fake_quantized.circle" \
- "${TEST_RESULT_FILE}.input.h5" \
- "${TEST_RESULT_FILE}.minmax_recorded.circle"
+ --input_model "${TEST_RESULT_FILE}.fake_quantized.circle" \
+ --input_data "${TESTCASE_FILE}.input.h5" \
+ --output_model "${TEST_RESULT_FILE}.minmax_recorded.circle"
# Dump min/max values (circle-tensordump)
"${CIRCLE_TENSORDUMP_PATH}" \
target_link_libraries(record-minmax luci_import)
target_link_libraries(record-minmax luci_export)
target_link_libraries(record-minmax luci_interpreter)
+target_link_libraries(record-minmax vconone)
install(TARGETS record-minmax DESTINATION bin)
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
nnas_find_package(GTest REQUIRED)
GTest_AddTest(record_minmax_function_test "${CMAKE_CURRENT_SOURCE_DIR}/tests/RecordFunction.test.cpp")
target_include_directories(record_minmax_function_test PRIVATE include)
#include "RecordMinMax.h"
#include <arser/arser.h>
+#include <vconone/vconone.h>
+
+void print_version(void)
+{
+ std::cout << "record-minmax version " << vconone::get_string() << std::endl;
+ std::cout << vconone::get_copyright() << std::endl;
+}
int entry(const int argc, char **argv)
{
arser::Arser arser(
"Embedding min/max values of activations to the circle model for post-training quantization");
+ arser.add_argument("--version")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("Show version information and exit")
+ .exit_with(print_version);
+
arser.add_argument("--input_model")
.nargs(1)
.type(arser::DataType::STR)
{
std::cout << err.what() << std::endl;
std::cout << arser;
- return 0;
+ return 255;
}
auto input_model_path = arser.get<std::string>("--input_model");
require("luci")
require("safemain")
require("arser")
+require("vconone")
#include <string>
#include <cassert>
+#include <stdexcept>
using Shape = luci_interpreter::Shape;
using DataType = luci_interpreter::DataType;
assert(node->opcode() != luci::CircleOpcode::UNPACK);
assert(node->opcode() != luci::CircleOpcode::WHILE);
- if (node->opcode() == luci::CircleOpcode::CONST)
+ if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
{
// node is not activation. Do nothing.
return;
auto node = iter->first;
auto minmax = iter->second;
- float min, max;
+ float min{0.0f}, max{0.0f};
if (mode == "percentile")
{
min = getNthPercentile(minmax.min_vector, min_percentile);
EXPECT_FLOAT_NEAR(0, getNthPercentile(input, 0));
EXPECT_FLOAT_NEAR(9, getNthPercentile(input, 100));
+
+ SUCCEED();
}
TEST(GetNthPercentileTest, Simple)
{
EXPECT_FLOAT_NEAR(0.09 * std::floor(i) + 0.045, getNthPercentile(input, i));
}
+
+ SUCCEED();
}
TEST(GetNthPercentileTest, Float)
EXPECT_FLOAT_NEAR(2.799942346802177, getNthPercentile(input, 1));
EXPECT_FLOAT_NEAR(7.768503955476342, getNthPercentile(input, 3.14));
EXPECT_FLOAT_NEAR(99.40456084968194, getNthPercentile(input, 99));
+
+ SUCCEED();
}
TEST(GetNthPercentileTest, FloatWithNegative)
EXPECT_FLOAT_NEAR(-47.20005765319782, getNthPercentile(input, 1));
EXPECT_FLOAT_NEAR(-42.23149604452366, getNthPercentile(input, 3.14));
EXPECT_FLOAT_NEAR(49.40456084968194, getNthPercentile(input, 99));
+
+ SUCCEED();
}
TEST(GetNthPercentileTest, SigleElement)
EXPECT_FLOAT_NEAR(33, getNthPercentile(input, 0));
EXPECT_FLOAT_NEAR(33, getNthPercentile(input, 50));
EXPECT_FLOAT_NEAR(33, getNthPercentile(input, 100));
+
+ SUCCEED();
}
TEST(GetNthPercentileTest, OutOfBoundary_NEG)
EXPECT_THROW(getNthPercentile(input, -1), std::runtime_error);
EXPECT_THROW(getNthPercentile(input, 101), std::runtime_error);
+
+ SUCCEED();
}
TEST(GetNthPercentileTest, EmptyVector_NEG)
std::vector<float> input;
EXPECT_THROW(getNthPercentile(input, 10), std::runtime_error);
+
+ SUCCEED();
}
} // namespace record_minmax
COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_EXPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_export_action>\"' >> ${TEST_CONFIG}
COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_IMPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_import_action>\"' >> ${TEST_CONFIG}
COMMAND ${CMAKE_COMMAND} -E echo 'MODEL2NNPKG_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh\"' >> ${TEST_CONFIG}
- COMMAND ${CMAKE_COMMAND} -E echo 'NNPKG_TEST_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/tests/scripts/nnpkg_test.sh\"' >> ${TEST_CONFIG}
COMMAND ${CMAKE_COMMAND} -E echo 'RUNTIME_LIBRARY_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/Product/out/\"' >> ${TEST_CONFIG}
DEPENDS
nnkit-run
#--------------- Remote Machine Setting ---------------#
set(REMOTE_IP "xxx.xxx.xxx.xxx")
set(REMOTE_USER "remote_username")
-
+
#--------------------- Tests list ---------------------#
add(UNIT_Add_000)
add(UNIT_Add_001)
...
```
- - If any Tensorflow model is added, or if `REMOTE_IP` and `REMOTE_USER` is not given, `tf2circle-value-pbtxt-remote-test` will not be created.
+ - If any Tensorflow model is added, or if `REMOTE_IP` and `REMOTE_USER` is not given, `tf2circle-value-pbtxt-remote-test` will not be created.
1. (Optional) ssh authentication
- This test uses `ssh` and `scp` commands, and those commands require a password of remote machine whenever they are called. This means that you should enter the password everytime when `ssh` and `scp` require.
- This test resolves the problem by using `ssh-copy-id`, which copies the public key of host machine to `authorized_keys` of remote machine. Because of that, this test will ask the password of remote machine only once, at the first time. This is the only user interaction while running this test.
├ Result_latest -> Result_YYMMDD_hhmmss.csv
├ Result_YYMMDD_hhmmss.csv
├ ...
- |
+ |
├ UNIT_Add_000
| ├ metadata
| | ├ MANIFEST
|
├ ...
```
-- `nnpkg_test.sh`, runtime products and each nnpackage are sent to `REMOTE_WORKDIR` in remote machine.
+- Runtime products and each nnpackage are sent to `REMOTE_WORKDIR` in remote machine.
- (TBD) Modify script not to remove obtained h5 file.
```
REMOTE_WORKDIR
- ├ nnpkg_test.sh
|
├ Product
| └ out
| ├ bin
| ├ lib
+ | ├ test
| ├ ...
|
├ UNIT_Add_000
echo "-- Found TF backend: ${TF_BACKEND_PATH}"
echo "-- Found TF2CIRCLE: ${TF2CIRCLE_PATH}"
echo "-- Found MODEL2NNPKG: ${MODEL2NNPKG_PATH}"
-echo "-- Found nnpkg_test: ${NNPKG_TEST_PATH}"
echo "-- Found Runtime library: ${RUNTIME_LIBRARY_PATH}"
echo "-- Found randomize action: ${RANDOMIZE_ACTION_PATH}"
echo "-- Found HDF5 export action: ${HDF5_EXPORT_ACTION_PATH}"
exit 3
fi
-if [ -z ${NNPKG_TEST_PATH} ] || [ ! -f ${NNPKG_TEST_PATH} ]; then
- echo "nnpkg_test is not found"
- exit 4
-fi
-
# Register remote machine ssh information
cat /dev/zero | ssh-keygen -q -N ""
ssh-copy-id -o ConnectTimeout=5 "${REMOTE_USER}@${REMOTE_IP}"
ssh "${REMOTE_USER}@${REMOTE_IP}" "mkdir -p ${REMOTE_WORKDIR}/Product/"
scp -r "${RUNTIME_LIBRARY_PATH}" "${REMOTE_USER}@${REMOTE_IP}:${REMOTE_WORKDIR}/Product/"
-# Send nnpkg_test.sh
-scp "${NNPKG_TEST_PATH}" "${REMOTE_USER}@${REMOTE_IP}:${REMOTE_WORKDIR}/"
-
TESTED=()
PASSED=()
FAILED=()
# Run test_arm_nnpkg in remote machine
scp -r "${WORKDIR}/${PREFIX}/" "${REMOTE_USER}@${REMOTE_IP}:${REMOTE_WORKDIR}/${PREFIX}/"
- ssh "${REMOTE_USER}@${REMOTE_IP}" "cd ${REMOTE_WORKDIR}; ./nnpkg_test.sh -i . -o ${PREFIX}/metadata/tc ${PREFIX}"
-
+ ssh "${REMOTE_USER}@${REMOTE_IP}" "cd ${REMOTE_WORKDIR}; ./Product/out/test/onert-test nnpkg-test -i . -o ${PREFIX}/metadata/tc ${PREFIX}"
+
if [[ $? -eq 0 ]]; then
touch "${PASSED_TAG}"
fi
get_target_property(ARTIFACTS_SRC_PATH testDataGenerator SOURCE_DIR)
-# In this test, only the runtime test is performed because the test from tf to
-# nnpackage is done in common-artifacts, and for this runtime test, generation of
+# In this test, only the runtime test is performed because the test from tf to
+# nnpackage is done in common-artifacts, and for this runtime test, generation of
# test data is required. And, tcgenerate in ${ARTIFACTS_SRC_PATH}/exclude.lst
# means it won't generate test data, which is why below "tcgenerate" macro excludes
-# specific opearators from runtime test.
-# Also, since circlize and optimize macro included in `exclude.lst` file is only
+# specific opearators from runtime test.
+# Also, since circlize and optimize macro included in `exclude.lst` file is only
# needed in common-artifacts, it has no function here.
macro(circlize)
endmacro()
add_custom_command(
OUTPUT ${TEST_CONFIG}
COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
- COMMAND ${CMAKE_COMMAND} -E echo 'NNPKG_TEST_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/tests/scripts/nnpkg_test.sh\"' >> ${TEST_CONFIG}
COMMAND ${CMAKE_COMMAND} -E echo 'RUNTIME_LIBRARY_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/Product/out/\"' >> ${TEST_CONFIG}
COMMENT "Generate test configuration"
)
set(REMOTE_IP "xxx.xxx.xxx.xxx")
set(REMOTE_USER "remote_username")
```
- - If any recipe is added, or if `REMOTE_IP` and `REMOTE_USER` is not given, `tf2nnpackage-value-remote-test` will not be created.
+ - If any recipe is added, or if `REMOTE_IP` and `REMOTE_USER` is not given, `tf2nnpackage-value-remote-test` will not be created.
1. (Optional) ssh authentication
- This test uses `ssh` and `scp` commands, and those commands require a password of remote machine whenever they are called. This means that you should enter the password everytime when `ssh` and `scp` require.
- This test resolves the problem by using `ssh-copy-id`, which copies the public key of host machine to `authorized_keys` of remote machine. Because of that, this test will ask the password of remote machine only once, at the first time. This is the only user interaction while running this test.
### Generated Files While Running
- All related files(`pb`, `circle`, `h5` ... etc.) are taken from `build/compiler/common-artifacts` folder.
-- `nnpkg_test.sh`, runtime products and each nnpackage are sent to `REMOTE_WORKDIR` in remote machine.
+- Runtime products and each nnpackage are sent to `REMOTE_WORKDIR` in remote machine.
- Each test result is generated in `build/compiler/common-artifacts` with the name `${RECIPE}.log`
### Check Test Result
source "${CONFIG_PATH}"
-echo "-- Found nnpkg_test: ${NNPKG_TEST_PATH}"
echo "-- Found Runtime library: ${RUNTIME_LIBRARY_PATH}"
echo "-- Found workdir: ${WORKDIR}"
-if [ -z ${NNPKG_TEST_PATH} ] || [ ! -f ${NNPKG_TEST_PATH} ]; then
- echo "nnpkg_test is not found"
- exit 4
-fi
-
# Register remote machine ssh information
cat /dev/zero | ssh-keygen -q -N ""
ssh-copy-id -o ConnectTimeout=5 "${REMOTE_USER}@${REMOTE_IP}"
ssh "${REMOTE_USER}@${REMOTE_IP}" "mkdir -p ${REMOTE_WORKDIR}/Product/"
scp -r "${RUNTIME_LIBRARY_PATH}" "${REMOTE_USER}@${REMOTE_IP}:${REMOTE_WORKDIR}/Product/"
-# Send nnpkg_test.sh
-scp "${NNPKG_TEST_PATH}" "${REMOTE_USER}@${REMOTE_IP}:${REMOTE_WORKDIR}/"
-
TESTED=()
PASSED=()
FAILED=()
PREFIX=${PREFIX}.opt ;
fi
scp -r "${PREFIX}/" "${REMOTE_USER}@${REMOTE_IP}:${REMOTE_WORKDIR}/${PREFIX}/"
- ssh "${REMOTE_USER}@${REMOTE_IP}" "cd ${REMOTE_WORKDIR}; ./nnpkg_test.sh ${PREFIX}"
-
+ ssh "${REMOTE_USER}@${REMOTE_IP}" "cd ${REMOTE_WORKDIR}; ./Product/out/test/onert-test nnpkg-test ${PREFIX}"
+
if [[ $? -eq 0 ]]; then
touch "${BINDIR}/${PASSED_TAG}"
fi
-h, --help show this help message and exit
--v1 Use TensorFlow Lite Converter 1.x
--v2 Use TensorFlow Lite Converter 2.x
+ --graph_def Use graph def file(default)
+ --saved_model Use saved model
+ --keras_model Use keras model
-i INPUT_PATH, --input_path INPUT_PATH
Full filepath of the input file.
-o OUTPUT_PATH, --output_path OUTPUT_PATH
Names of the input arrays, comma-separated.
-s INPUT_SHAPES, --input_shapes INPUT_SHAPES
Shapes corresponding to --input_arrays, colon-
- separated.
+ separated.(ex:"1,4,4,3:1,20,20,3")
-O OUTPUT_ARRAYS, --output_arrays OUTPUT_ARRAYS
Names of the output arrays, comma-separated.
+
```
-#!/usr/bin/env python
+#!/usr/bin/env python3
# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
# Copyright (C) 2018 The TensorFlow Authors
converter_version.add_argument(
"--v2", action="store_true", help="Use TensorFlow Lite Converter 2.x")
+ # Input model format
+ model_format_arg = parser.add_mutually_exclusive_group()
+ model_format_arg.add_argument(
+ "--graph_def",
+ action="store_const",
+ dest="model_format",
+ const="graph_def",
+ help="Use graph def file(default)")
+ model_format_arg.add_argument(
+ "--saved_model",
+ action="store_const",
+ dest="model_format",
+ const="saved_model",
+ help="Use saved model")
+ model_format_arg.add_argument(
+ "--keras_model",
+ action="store_const",
+ dest="model_format",
+ const="keras_model",
+ help="Use keras model")
+
# Input and output path.
parser.add_argument(
"-i",
help="Names of the output arrays, comma-separated.",
required=True)
+ # Set default value
+ parser.set_defaults(model_format="graph_def")
return parser
def _v1_convert(flags):
- input_shapes = None
- if flags.input_shapes:
- input_arrays = _parse_array(flags.input_arrays)
- input_shapes_list = [
- _parse_array(shape, type_fn=int) for shape in flags.input_shapes.split(":")
- ]
- input_shapes = dict(list(zip(input_arrays, input_shapes_list)))
-
- converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(
- flags.input_path, _parse_array(flags.input_arrays),
- _parse_array(flags.output_arrays), input_shapes)
+ if flags.model_format == "graph_def":
+ input_shapes = None
+ if flags.input_shapes:
+ input_arrays = _parse_array(flags.input_arrays)
+ input_shapes_list = [
+ _parse_array(shape, type_fn=int)
+ for shape in flags.input_shapes.split(":")
+ ]
+ input_shapes = dict(list(zip(input_arrays, input_shapes_list)))
+
+ converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(
+ flags.input_path, _parse_array(flags.input_arrays),
+ _parse_array(flags.output_arrays), input_shapes)
+
+ if flags.model_format == "saved_model":
+ converter = tf.compat.v1.lite.TFLiteConverter.from_saved_model(flags.input_path)
+
+ if flags.model_format == "keras_model":
+ converter = tf.compat.v1.lite.TFLiteConverter.from_keras_model_file(
+ flags.input_path)
converter.allow_custom_ops = True
def _v2_convert(flags):
- file_content = open(flags.input_path, 'rb').read()
- try:
- graph_def = tf.compat.v1.GraphDef()
- graph_def.ParseFromString(file_content)
- except (_text_format.ParseError, DecodeError):
+ if flags.model_format == "graph_def":
+ file_content = open(flags.input_path, 'rb').read()
try:
- _text_format.Merge(file_content, graph_def)
+ graph_def = tf.compat.v1.GraphDef()
+ graph_def.ParseFromString(file_content)
except (_text_format.ParseError, DecodeError):
- raise IOError("Unable to parse input file '{}'.".format(flags.input_path))
-
- wrap_func = wrap_frozen_graph(
- graph_def,
- inputs=[
- _str + ":0" if len(_str.split(":")) == 1 else _str
- for _str in _parse_array(flags.input_arrays)
- ],
- outputs=[
- _str + ":0" if len(_str.split(":")) == 1 else _str
- for _str in _parse_array(flags.output_arrays)
- ])
- converter = tf.lite.TFLiteConverter.from_concrete_functions([wrap_func])
+ try:
+ _text_format.Merge(file_content, graph_def)
+ except (_text_format.ParseError, DecodeError):
+ raise IOError("Unable to parse input file '{}'.".format(flags.input_path))
+
+ wrap_func = wrap_frozen_graph(
+ graph_def,
+ inputs=[
+ _str + ":0" if len(_str.split(":")) == 1 else _str
+ for _str in _parse_array(flags.input_arrays)
+ ],
+ outputs=[
+ _str + ":0" if len(_str.split(":")) == 1 else _str
+ for _str in _parse_array(flags.output_arrays)
+ ])
+ converter = tf.lite.TFLiteConverter.from_concrete_functions([wrap_func])
+
+ if flags.model_format == "saved_model":
+ converter = tf.lite.TFLiteConverter.from_saved_model(flags.input_path)
+
+ if flags.model_format == "keras_model":
+ keras_model = tf.keras.models.load_model(flags.input_path)
+ converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
converter.allow_custom_ops = True
converter.experimental_new_converter = True
add_executable(tfl-verify ${SOURCES})
target_include_directories(tfl-verify PRIVATE src)
+target_link_libraries(tfl-verify arser)
target_link_libraries(tfl-verify foder)
target_link_libraries(tfl-verify mio_tflite)
target_link_libraries(tfl-verify safemain)
+require("arser")
require("foder")
require("mio-tflite")
require("safemain")
#include "VerifyFlatBuffers.h"
+#include <arser/arser.h>
+
#include <iostream>
#include <memory>
#include <string>
int entry(int argc, char **argv)
{
- if (argc != 2)
+ arser::Arser arser;
+ arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file path to verify");
+
+ try
{
- std::cerr << "ERROR: Failed to parse arguments" << std::endl;
- std::cerr << std::endl;
- std::cerr << "USAGE: " << argv[0] << " [tflite]" << std::endl;
+ arser.parse(argc, argv);
+ }
+ catch (const std::runtime_error &err)
+ {
+ std::cout << err.what() << std::endl;
+ std::cout << arser;
return 255;
}
+
auto verifier = std::make_unique<VerifyFlatbuffers>();
- std::string model_file = argv[argc - 1];
+ std::string model_file = arser.get<std::string>("tflite");
std::cout << "[ RUN ] Check " << model_file << std::endl;
quant_builder.add_min(quant_min);
quant_builder.add_scale(quant_scale);
quant_builder.add_zero_point(quant_zero_point);
+ quant_builder.add_quantized_dimension(quant.quantized_dimension());
// Update QuantizationParameters Index
quant_index = quant_builder.Finish();
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NonMaxSuppressionV4.h"
+
+flatbuffers::Offset<void> NonMaxSuppressionV4Chef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ tflite::NonMaxSuppressionV4OptionsBuilder options_builder{fbb};
+
+ return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef>
+NonMaxSuppressionV4ChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new NonMaxSuppressionV4Chef{operation}};
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_NON_MAX_SUPPRESSION_V4_H__
+#define __OP_NON_MAX_SUPPRESSION_V4_H__
+
+#include "OpChef.h"
+
+class NonMaxSuppressionV4Chef final : public OpChef
+{
+public:
+ explicit NonMaxSuppressionV4Chef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override
+ {
+ return tflite::BuiltinOperator_NON_MAX_SUPPRESSION_V4;
+ }
+
+ tflite::BuiltinOptions type(void) const override
+ {
+ return tflite::BuiltinOptions_NonMaxSuppressionV4Options;
+ }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct NonMaxSuppressionV4ChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_NON_MAX_SUPPRESSION_V4_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PadV2.h"
+
+flatbuffers::Offset<void> PadV2Chef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ tflite::PadV2OptionsBuilder padv2_options_builder{fbb};
+ return padv2_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> PadV2ChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new PadV2Chef{operation}};
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_PADV2_H__
+#define __OP_PADV2_H__
+
+#include "OpChef.h"
+
+class PadV2Chef final : public OpChef
+{
+public:
+ explicit PadV2Chef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_PADV2; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_PadV2Options; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct PadV2ChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_PADV2_H__
OP_CHEF(MirrorPad, MirrorPadChefFactory)
OP_CHEF(Mul, MulChefFactory)
OP_CHEF(Neg, NegChefFactory)
+OP_CHEF(NonMaxSuppressionV4, NonMaxSuppressionV4ChefFactory)
OP_CHEF(NotEqual, NotEqualChefFactory)
OP_CHEF(OneHot, OneHotChefFactory)
OP_CHEF(Pack, PackChefFactory)
OP_CHEF(Pad, PadChefFactory)
+OP_CHEF(PadV2, PadV2ChefFactory)
OP_CHEF(Pow, PowChefFactory)
OP_CHEF(PRelu, PReluChefFactory)
OP_CHEF(Range, RangeChefFactory)
#include "Op/MirrorPad.h"
#include "Op/Mul.h"
#include "Op/Neg.h"
+#include "Op/NonMaxSuppressionV4.h"
#include "Op/NotEqual.h"
#include "Op/OneHot.h"
#include "Op/Pack.h"
#include "Op/Pad.h"
+#include "Op/PadV2.h"
#include "Op/Pow.h"
#include "Op/PRelu.h"
#include "Op/Range.h"
repeated float max = 2;
repeated float scale = 3;
repeated int64 zero_point = 4;
+ optional int32 quantized_dimension = 5 [default = 0];
}
message Operand {
// None
}
+message PadV2Options {
+ // None
+}
+
message MirrorPadOptions {
optional MirrorPadMode mode = 1 [default = REFLECT];
}
// None
}
+message NonMaxSuppressionV4Options {
+ // None
+}
+
message NotEqualOptions {
// None
}
optional LogSoftmaxOptions log_softmax_options = 168;
// DequantizeOptions 169
optional NegOptions neg_options = 170;
- // PadV2Options 171
+ optional PadV2Options padv2_options = 171;
optional LessEqualOptions lessequal_options = 172;
optional SliceOptions slice_options = 173;
optional TransposeConvOptions transpose_conv_options = 174;
optional MatrixSetDiagOptions matrix_set_diag_options = 195;
// HardSwishOptions 196
optional DepthToSpaceOptions depth_to_space_options = 197;
- // NonMaxSuppressionV4Options 198
+ optional NonMaxSuppressionV4Options non_max_suppression_v4_options = 198;
// NonMaxSuppressionV5Options 199
optional ScatterNdOptions scatter_nd_options = 200;
optional NotEqualOptions notequal_options = 201;
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NonMaxSuppressionV4.h"
+
+#include "Convert.h"
+#include "FillerHelper.h"
+
+namespace tflchef
+{
+
+void TFliteOpNonMaxSuppressionV4::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ const auto &inputs = *op->inputs();
+
+ const tflite::Tensor *max_output_size_tensor = import->tensors()->Get(inputs[2]);
+ assert(max_output_size_tensor->type() == tflite::TensorType::TensorType_INT32);
+
+ const tflite::Tensor *iou_threshold_tensor = import->tensors()->Get(inputs[3]);
+ assert(iou_threshold_tensor->type() == tflite::TensorType::TensorType_FLOAT32);
+
+ const tflite::Tensor *score_threshold_tensor = import->tensors()->Get(inputs[4]);
+ assert(score_threshold_tensor->type() == tflite::TensorType::TensorType_FLOAT32);
+
+ for (int32_t index = 2; index < 5; ++index)
+ {
+ fill_tensor_to_import(index, import);
+ }
+}
+
+tflchef::Operation *TFliteOpNonMaxSuppressionV4::build(const tflite::Operator *op,
+ TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("NonMaxSuppressionV4");
+
+ return operation;
+}
+
+} // namespace tflchef
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_NON_MAX_SUPPRESSION_V4_H__
+#define __TFLITE_OP_NON_MAX_SUPPRESSION_V4_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for NON_MAX_SUPPRESSION_V4
+ */
+class TFliteOpNonMaxSuppressionV4 : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_NON_MAX_SUPPRESSION_V4_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PadV2.h"
+
+#include "FillerHelper.h"
+
+namespace tflchef
+{
+
+void TFliteOpPadV2::filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ // Filler for paddings and constant_values
+ fill_tensor_to_import(1, import);
+ fill_tensor_to_import(2, import);
+}
+
+tflchef::Operation *TFliteOpPadV2::build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const
+{
+ auto operation = model_recipe->add_operation();
+
+ operation->set_type("PadV2");
+
+ return operation;
+}
+
+} // namespace tflchef
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_PADV2_H__
+#define __TFLITE_OP_PADV2_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for PADV2
+ */
+class TFliteOpPadV2 : public TFliteOpChef
+{
+public:
+ void filler(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+ tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+ tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_PADV2_H__
auto vec = extract_buffer<int32_t>(buffer);
import->set_tensor_filler(inputs[0], vec);
}
+
+ // filter
+ const tflite::Tensor *filter_tensor = import->tensors()->Get(inputs[1]);
+ import->set_tensor_filler(inputs[1]);
}
tflchef::Operation *TFliteOpTransposeConv::build(const tflite::Operator *op, TFliteImport *import,
for (uint32_t idx = 0; idx < quant->zero_point()->size(); ++idx)
chef_quant->add_zero_point(quant->zero_point()->Get(idx));
}
+ tflchef::TensorQuantization *chef_quant = operand->mutable_quant();
+ chef_quant->set_quantized_dimension(quant->quantized_dimension());
}
}
#include "Op/MirrorPad.h"
#include "Op/Mul.h"
#include "Op/Neg.h"
+#include "Op/NonMaxSuppressionV4.h"
#include "Op/NotEqual.h"
#include "Op/OneHot.h"
#include "Op/Pack.h"
#include "Op/Pad.h"
+#include "Op/PadV2.h"
#include "Op/Pow.h"
#include "Op/PRelu.h"
#include "Op/Range.h"
REG_TFL_OP(MIRROR_PAD, TFliteOpMirrorPad);
REG_TFL_OP(MUL, TFliteOpMul);
REG_TFL_OP(NEG, TFliteOpNeg);
+ REG_TFL_OP(NON_MAX_SUPPRESSION_V4, TFliteOpNonMaxSuppressionV4);
REG_TFL_OP(NOT_EQUAL, TFliteOpNotEqual);
REG_TFL_OP(ONE_HOT, TFliteOpOneHot);
REG_TFL_OP(PACK, TFliteOpPack);
REG_TFL_OP(PAD, TFliteOpPad);
+ REG_TFL_OP(PADV2, TFliteOpPadV2);
REG_TFL_OP(POW, TFliteOpPow);
REG_TFL_OP(PRELU, TFliteOpPRelu);
REG_TFL_OP(RANGE, TFliteOpRange);
{
std::cout << err.what() << std::endl;
std::cout << arser;
- return 0;
+ return 255;
}
int32_t model_version = 1;
{
std::cout << err.what() << std::endl;
std::cout << arser;
- return 0;
+ return 255;
}
std::string tflite_path = arser.get<std::string>("tflite");
{
std::cout << err.what() << '\n';
std::cout << arser;
- return 0;
+ return 255;
}
std::string tflite_path = arser.get<std::string>("tflite");
_op_map[tflite::BuiltinOperator_MAX_POOL_2D] = make_unique<Pool2DPrinter>();
_op_map[tflite::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>();
_op_map[tflite::BuiltinOperator_MUL] = make_unique<MulPrinter>();
+ // There is no Option for NON_MAX_SUPPRESSION_V4
_op_map[tflite::BuiltinOperator_ONE_HOT] = make_unique<OneHotPrinter>();
_op_map[tflite::BuiltinOperator_PACK] = make_unique<PackPrinter>();
// There is no Option for PAD
target_link_libraries(tflite2circle safemain)
target_link_libraries(tflite2circle mio_tflite)
target_link_libraries(tflite2circle mio_circle)
+target_link_libraries(tflite2circle vconone)
install(TARGETS tflite2circle DESTINATION bin)
#include "CircleModel.h"
#include "TFLModel.h"
+#include <vconone/vconone.h>
+
+void print_version(void)
+{
+ std::cout << "tflite2circle version " << vconone::get_string() << std::endl;
+ std::cout << vconone::get_copyright() << std::endl;
+}
+
int entry(int argc, char **argv)
{
arser::Arser arser{"tflite2circle is a Tensorflow lite to circle model converter"};
+ arser.add_argument("--version")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("Show version information and exit")
+ .exit_with(print_version);
+
arser.add_argument("tflite")
.nargs(1)
.type(arser::DataType::STR)
{
std::cout << err.what() << std::endl;
std::cout << arser;
- return 0;
+ return 255;
}
std::string tfl_path = arser.get<std::string>("tflite");
require("mio-tflite")
require("mio-circle")
require("safemain")
+require("vconone")
#include "BuildBuiltinOptions/MirrorPadOptions.h"
#include "BuildBuiltinOptions/MulOptions.h"
#include "BuildBuiltinOptions/NegOptions.h"
+#include "BuildBuiltinOptions/NonMaxSuppressionV4Options.h"
#include "BuildBuiltinOptions/NotEqualOptions.h"
#include "BuildBuiltinOptions/OneHotOptions.h"
#include "BuildBuiltinOptions/PackOptions.h"
#include "BuildBuiltinOptions/PadOptions.h"
+#include "BuildBuiltinOptions/PadV2Options.h"
#include "BuildBuiltinOptions/RangeOptions.h"
#include "BuildBuiltinOptions/Pool2DOptions.h"
#include "BuildBuiltinOptions/PowOptions.h"
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NonMaxSuppressionV4Options.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::NonMaxSuppressionV4Options>
+build_circle_NonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *)
+{
+ circle::NonMaxSuppressionV4OptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_NON_MAX_SUPPRESSION_V4_OPTIONS_H__
+#define __BBO_NON_MAX_SUPPRESSION_V4_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::NonMaxSuppressionV4Options>
+build_circle_NonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &fb,
+ const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_NON_MAX_SUPPRESSION_V4_OPTIONS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PadV2Options.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::PadV2Options>
+build_circle_PadV2Options(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ circle::PadV2OptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_PADV2_OPTIONS_H__
+#define __BBO_PADV2_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::PadV2Options>
+build_circle_PadV2Options(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_PADV2_OPTIONS_H__
//TFL_BUILTIN_OPTIONS(EmbeddingLookupSparseOptions)
TFL_BUILTIN_OPTIONS(MulOptions)
TFL_BUILTIN_OPTIONS(PadOptions)
+TFL_BUILTIN_OPTIONS(PadV2Options)
TFL_BUILTIN_OPTIONS(GatherOptions)
TFL_BUILTIN_OPTIONS(BatchToSpaceNDOptions)
TFL_BUILTIN_OPTIONS(SpaceToBatchNDOptions)
TFL_BUILTIN_OPTIONS(IfOptions)
TFL_BUILTIN_OPTIONS(WhileOptions)
TFL_BUILTIN_OPTIONS(DepthToSpaceOptions)
-//TFL_BUILTIN_OPTIONS(NonMaxSuppressionV4Options)
+TFL_BUILTIN_OPTIONS(NonMaxSuppressionV4Options)
//TFL_BUILTIN_OPTIONS(NonMaxSuppressionV5Options)
TFL_BUILTIN_OPTIONS(RankOptions)
TFL_BUILTIN_OPTIONS(ScatterNdOptions)
--- /dev/null
+if (NOT VCONONE_VERSION)
+ set(VCONONE_VERSION 0x0000000000080001)
+ # NOTE order is [build patch minor major]
+ # if VCONONE_VERSION is set with -D option, it will be cached
+ # you may have to remove cache file if you remove -D option
+endif()
+
+configure_file(version_cfg.h.in version_cfg.h @ONLY)
+
+set(DRIVER "driver/driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(vconone STATIC ${SOURCES})
+target_include_directories(vconone PUBLIC include)
+target_include_directories(vconone PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
+
+add_executable(one-version ${DRIVER})
+target_link_libraries(one-version vconone)
+install(TARGETS one-version DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(vconone_test ${TESTS})
+target_link_libraries(vconone_test vconone)
--- /dev/null
+# vconone
+
+_vconone_ provides version number and strings for one-* commands and command
+line tools
+
+# Revise version number
+
+To revise version number, update `VCONONE_VERSION` in `CmakeLists.txt`
+or give `-DVCONONE_VERSION=0x0000000100080001` at cmake configure step.
+
+Number given is four numbers `build`, `patch`, `minor` and `major` in order for
+each 16bit integers. `build` is not used for now.
+
+`0x0000000100080001` version is interpretered as `1.8.1`
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vconone/vconone.h>
+
+#include <string>
+#include <iostream>
+
+int main(int argc, char *argv[])
+{
+ auto str = vconone::get_string();
+ if (argc >= 2)
+ {
+ for (int c = 1; c < argc; ++c)
+ std::cout << argv[c] << " ";
+ std::cout << "version " << str << std::endl;
+ std::cout << vconone::get_copyright() << std::endl;
+ }
+ else
+ std::cout << str;
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __VCON_ONE_H__
+#define __VCON_ONE_H__
+
+#include <cstdint>
+#include <string>
+
+namespace vconone
+{
+
+struct four
+{
+ uint16_t major;
+ uint16_t minor;
+ uint16_t patch;
+ uint16_t build; // build is not used for now
+};
+
+union version {
+ uint64_t v;
+ four f;
+};
+
+/**
+ * @brief get_number will return version union structure
+ */
+version get_number(void);
+
+/**
+ * @brief get_string will return string of major.minor.patch (without build)
+ */
+std::string get_string(void);
+
+/**
+ * @brief get_string4 will return string of major.minor.patch.build
+ */
+std::string get_string4(void);
+
+/**
+ * @brief get_copyright will return copyright string
+ */
+std::string get_copyright(void);
+
+} // namespace vconone
+
+#endif // __VCON_ONE_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "vconone/vconone.h"
+
+#include "version_cfg.h"
+
+#include <sstream>
+
+namespace vconone
+{
+
+version get_number(void)
+{
+ version v;
+ v.v = VCONONE_VERSION;
+ return v;
+}
+
+std::string get_string4(void)
+{
+ std::ostringstream ss;
+
+ auto v = get_number();
+ ss << unsigned(v.f.major) << "." << unsigned(v.f.minor) << "." << unsigned(v.f.patch) << "."
+ << unsigned(v.f.build);
+
+ return ss.str();
+}
+
+std::string get_string(void)
+{
+ std::ostringstream ss;
+
+ auto v = get_number();
+ ss << unsigned(v.f.major) << "." << unsigned(v.f.minor) << "." << unsigned(v.f.patch);
+
+ return ss.str();
+}
+
+std::string get_copyright(void)
+{
+ std::string str;
+ str = "Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved\r\n";
+ str += "Licensed under the Apache License, Version 2.0\r\n";
+ str += "https://github.com/Samsung/ONE";
+ return str;
+}
+
+} // namespace vconone
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vconone/vconone.h>
+
+#include <gtest/gtest.h>
+
+TEST(vconone, version_number)
+{
+ auto v = vconone::get_number();
+
+ ASSERT_NE(0x0000000000000000ULL, v.v);
+}
+
+TEST(vconone, version_string)
+{
+ auto str = vconone::get_string();
+
+ ASSERT_NE("..", str);
+ ASSERT_NE("", str);
+}
+
+TEST(vconone, version_string4)
+{
+ auto str = vconone::get_string4();
+
+ ASSERT_NE("...", str);
+ ASSERT_NE("", str);
+}
+
+TEST(vconone, copyright)
+{
+ auto str = vconone::get_copyright();
+
+ ASSERT_NE("", str);
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __VCON_ONE_VERSION_CFG_H__
+#define __VCON_ONE_VERSION_CFG_H__
+
+#define VCONONE_VERSION @VCONONE_VERSION@ULL
+
+#endif // __VCON_ONE_VERSION_CFG_H__
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/**
- * @file CLArgOperationKernel.h
- * @brief This file defines CLArgOperationKernel
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __ARM_COMPUTE_CLARGOPERATIONKERNEL_H__
-#define __ARM_COMPUTE_CLARGOPERATIONKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to define interface for the argop kernel.
- */
-class CLArgOperationKernel : public ICLKernel
-{
-public:
- /**
- * @brief Default constructor.
- */
- CLArgOperationKernel();
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLArgOperationKernel to be copied
- */
- CLArgOperationKernel(const CLArgOperationKernel &) = delete;
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLArgOperationKernel to be copied
- * @return Reference of this instance
- */
- CLArgOperationKernel &operator=(const CLArgOperationKernel &) = delete;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLArgOperationKernel to be moved
- */
- CLArgOperationKernel(CLArgOperationKernel &&) = default;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLArgOperationKernel to be moved
- * @return Reference of this instance
- */
- CLArgOperationKernel &operator=(CLArgOperationKernel &&) = default;
- /**
- * @brief Initialise the kernel's input, output and border mode.
- * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32.
- * @param[out] output The output tensor, Data types supported: S32.
- * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
- * @param[in] op Arg operation to perform.
- * return N/A
- */
- void configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis, ArgOperation op);
- /**
- * @brief Static function to check if given info will lead to a valid configuration of @ref
- * CLArgOperationKernel
- * @param[in] input An input tensor info. Data types supported: U8/QASYMM8/S32/F32.
- * @param[in] output The output tensor info, Data types supported: S32.
- * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
- * @param[in] op Arg operation to perform.
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis,
- ArgOperation op);
-
- /*
- * @brief Run CLArgOperationKernel op
- * @param[in] window Window to be used for in_slice
- * @param[in] queue cl::CommandQueue
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- uint32_t _axis;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLARGOPERATIONKERNEL_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/**
- * @file CLCastKernel.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file defines CLCastKernel class
- */
-
-#ifndef __ARM_COMPUTE_CLCASTKERNEL_H__
-#define __ARM_COMPUTE_CLCASTKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to define OpenCL kernel for cast operation
- */
-class CLCastKernel : public ICLKernel
-{
-public:
- /**
- * @brief Construct CLCastKernel object
- */
- CLCastKernel();
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- */
- CLCastKernel(const CLCastKernel &) = delete;
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- */
- CLCastKernel &operator=(const CLCastKernel &) = delete;
-
- /**
- * @brief Construct CLCastKernel object using default move constructor
- * @param[in] CLCastKernel object to move
- */
- CLCastKernel(CLCastKernel &&) = default;
-
- /**
- * @brief Allow instances of this class to be moved
- * @param[in] CLCastKernel object to move
- */
- CLCastKernel &operator=(CLCastKernel &&) = default;
-
- /**
- * @brief Destruct this CLCastKernel object
- */
- ~CLCastKernel() = default;
-
- /**
- * @brief Initialise the kernel's input and output.
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] input_subtype Sub data type of input.
- * @return N/A
- */
- void configure(const ICLTensor *input, ICLTensor *output, SubDataType input_subtype);
-
- /**
- * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
- * queue.
- * @note The queue is *not* flushed by this method, and therefore the kernel will not have
- * been executed by the time this method returns.
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of
- * the window returned by window()).
- * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLCASTKERNEL_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__
-#define __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform depthTospace operation */
-class CLDepthToSpaceKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDepthToSpaceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthToSpaceKernel(const CLDepthToSpaceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthToSpaceKernel &operator=(const CLDepthToSpaceKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDepthToSpaceKernel(CLDepthToSpaceKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDepthToSpaceKernel &operator=(CLDepthToSpaceKernel &&) = default;
- /** Default destructor */
- ~CLDepthToSpaceKernel() = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- ICLTensor *_output; /**< Destination tensor */
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNELEX_H__
-#define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNELEX_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply matrices
- *
- * @note This kernel should be used ONLY for Midgard architectures
- *
- * This kernel performs the following computation:
- *
- * -# Convert a values from int8 to int32
- * -# Convert b values from int8 to int32
- * -# Compute the int32 matrix product of the resulting a * b and store the result as int32
- *
- */
-class CLGEMMLowpMatrixMultiplyKernelEx : public ICLKernel
-{
-public:
- /** Default Constructor */
- CLGEMMLowpMatrixMultiplyKernelEx();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyKernelEx(const CLGEMMLowpMatrixMultiplyKernelEx &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyKernelEx &operator=(const CLGEMMLowpMatrixMultiplyKernelEx &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpMatrixMultiplyKernelEx(CLGEMMLowpMatrixMultiplyKernelEx &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpMatrixMultiplyKernelEx &operator=(CLGEMMLowpMatrixMultiplyKernelEx &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @note This kernel should be used ONLY for Midgard architectures
- *
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8
- * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p
- * input0
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type
- * supported: S32
- * @param[in] gemm_info (Optional) GEMM information used to retrieve the original dimensions of
- * the input matrices
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output,
- const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLGEMMLowpMatrixMultiplyKernelEx
- *
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8
- * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p
- * input0
- * @param[in] output Output tensor to store the result of matrix multiplication. Data type
- * supported: S32
- * @param[in] gemm_info (Optional) GEMM information used to retrieve the original dimensions of
- * the input matrices
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1,
- const ITensorInfo *output,
- const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo());
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- ICLTensor *_output;
- bool _slide_matrix_b;
- bool _reinterpret_input_as_3d;
- bool _reinterpret_output_as_3d;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNELEX_H__*/
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLPRELU_KERNEL_H__
-#define __ARM_COMPUTE_CLPRELU_KERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to calculate PReLU*/
-class CLPReLUKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLPReLUKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLPReLUKernel(const CLPReLUKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLPReLUKernel &operator=(const CLPReLUKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLPReLUKernel(CLPReLUKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLPReLUKernel &operator=(CLPReLUKernel &&) = default;
- /** Initialize the kernel's input, output.
- *
- * @param[in] input Source tensor1.
- * @param[in] alpha Source tensor2.
- * @param[out] output Output tensor.
- */
- void configure(const ICLTensor *input, const ICLTensor *alpha, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_alpha;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLPRELU_KERNEL_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__
-#define __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform spaceTodepth operation */
-class CLSpaceToDepthKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLSpaceToDepthKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSpaceToDepthKernel(const CLSpaceToDepthKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSpaceToDepthKernel &operator=(const CLSpaceToDepthKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSpaceToDepthKernel(CLSpaceToDepthKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSpaceToDepthKernel &operator=(CLSpaceToDepthKernel &&) = default;
- /** Default destructor */
- ~CLSpaceToDepthKernel() = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- ICLTensor *_output; /**< Destination tensor */
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__
-#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the Upsampling layer kernel for transpose convolution on OpenCL.
- */
-class CLTransposeConvLayerUpsampleKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLTransposeConvLayerUpsampleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLTransposeConvLayerUpsampleKernel(const CLTransposeConvLayerUpsampleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLTransposeConvLayerUpsampleKernel &
- operator=(const CLTransposeConvLayerUpsampleKernel &) = delete;
- /** Default Move Constructor. */
- CLTransposeConvLayerUpsampleKernel(CLTransposeConvLayerUpsampleKernel &&) = default;
- /** Default move assignment operator */
- CLTransposeConvLayerUpsampleKernel &operator=(CLTransposeConvLayerUpsampleKernel &&) = default;
- /** Default destructor */
- ~CLTransposeConvLayerUpsampleKernel() = default;
-
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32.
- * @param[out] output Destination tensor. Data types supported: same as @p input. All but
- * the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only
- * performed within the XY-plane.
- * @param[in] inner_border Top and right inner border sizes. These rows and columns will be
- * filled with zero.
- * @param[in] info Contains padding and stride information described in @ref
- * PadStrideInfo.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
- const PadStrideInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLTransposeConvLayerUpsample
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32.
- * @param[in] output Destination tensor info. Data types supported: same as @p input. All
- * but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is
- * only performed within the XY-plane.
- * @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled
- * with zero.
- * @param[in] info Contains padding and stride information described in @ref
- * PadStrideInfo.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const BorderSize &inner_border, const PadStrideInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- BorderSize _inner_border;
- PadStrideInfo _info;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__
-#define __ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__
-
-#include "arm_compute/core/CPP/ICPPKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** CPP kernel to perform tensor upsample.
- *
- */
-class CPPUpsampleKernelEx : public ICPPKernel
-{
-public:
- const char *name() const override { return "CPPUpsampleKernelEx"; }
- /** Default constructor */
- CPPUpsampleKernelEx();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CPPUpsampleKernelEx(const CPPUpsampleKernelEx &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CPPUpsampleKernelEx &operator=(const CPPUpsampleKernelEx &) = delete;
- /** Allow instances of this class to be moved */
- CPPUpsampleKernelEx(CPPUpsampleKernelEx &&) = default;
- /** Allow instances of this class to be moved */
- CPPUpsampleKernelEx &operator=(CPPUpsampleKernelEx &&) = default;
- /** Default destructor */
- ~CPPUpsampleKernelEx() = default;
-
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8
- * @param[out] output The output tensor. Data types supported: Same as @p input
- * @param[in] info Padding info.
- */
- void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- bool is_parallelisable() const override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- PadStrideInfo _info;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NECASTKERNEL_H__
-#define __ARM_COMPUTE_NECASTKERNEL_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the cast layer kernel. */
-class NECastKernel : public INEKernel
-{
-public:
- const char *name() const override { return "NECastKernel"; }
- /** Default constructor */
- NECastKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECastKernel(const NECastKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NECastKernel &operator=(const NECastKernel &) = delete;
- /** Default Move Constructor. */
- NECastKernel(NECastKernel &&) = default;
- /** Default move assignment operator */
- NECastKernel &operator=(NECastKernel &&) = default;
- /** Default destructor */
- ~NECastKernel() = default;
- /** Set input, output tensors.
- *
- * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U32/S32/F32.
- * @param[out] output Destination tensor with the same dimensions of input. Data type supported:
- * U8/S8/QASYMM8/U32/S32/F32.
- * @param[in] input_subtype Sub data type of input.
- */
- void configure(const ITensor *input, ITensor *output, SubDataType input_subtype);
- /** Static function to check if given info will lead to a valid configuration of @ref NECastKernel
- *
- * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U32/S32/F32.
- * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U32/S32/F32.
- * @param[in] input_subtype Sub data type of input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- SubDataType input_subtype);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
- SubDataType _input_subtype;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NECASTKERNEL_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__
-#define __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the depth to space kernel */
-class NEDepthToSpaceLayerKernelEx : public INEKernel
-{
-public:
- const char *name() const override { return "NEDepthToSpaceLayerKernelEx"; }
- /** Default constructor */
- NEDepthToSpaceLayerKernelEx();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthToSpaceLayerKernelEx(const NEDepthToSpaceLayerKernelEx &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthToSpaceLayerKernelEx &operator=(const NEDepthToSpaceLayerKernelEx &) = delete;
- /** Allow instances of this class to be moved */
- NEDepthToSpaceLayerKernelEx(NEDepthToSpaceLayerKernelEx &&) = default;
- /** Allow instances of this class to be moved */
- NEDepthToSpaceLayerKernelEx &operator=(NEDepthToSpaceLayerKernelEx &&) = default;
- /** Default destructor */
- ~NEDepthToSpaceLayerKernelEx() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[out] output Tensor output. Data types supported: same as @p input
- * @param[in] block_shape Block shape x value.
- */
- void configure(const ITensor *input, ITensor *output, int32_t block_shape);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * NEDepthToSpaceLayerKernelEx.
- *
- * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[in] output Tensor output info. Data types supported: same as @p input
- * @param[in] block_shape Block shape value.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input; /**< Source tensor */
- ITensor *_output; /**< Destination tensor */
- int32_t _block_shape; /**< Block shape */
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__
-#define __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for an element-wise unary operation kernel
- *
- * Element-wise operation is computed by:
- * @f[ output(x) = OP(input(x))@f]
- *
- */
-class NEElementwiseUnaryKernelEx : public INEKernel
-{
-public:
- const char *name() const override { return "NEElementwiseUnaryKernelEx"; }
- /** Default constructor */
- NEElementwiseUnaryKernelEx();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEElementwiseUnaryKernelEx(const NEElementwiseUnaryKernelEx &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEElementwiseUnaryKernelEx &operator=(const NEElementwiseUnaryKernelEx &) = delete;
- /** Allow instances of this class to be moved */
- NEElementwiseUnaryKernelEx(NEElementwiseUnaryKernelEx &&) = default;
- /** Allow instances of this class to be moved */
- NEElementwiseUnaryKernelEx &operator=(NEElementwiseUnaryKernelEx &&) = default;
- /** Default destructor */
- ~NEElementwiseUnaryKernelEx() = default;
-
- /** Static function to check if given info will lead to a valid configuration of @ref
- * NEElementwiseUnaryKernelEx
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input First tensor input. Data types supported: F16/F32/S32.
- * @param[in] output Output tensor. Data types supported: Same as @p input.
- */
- void configure(ElementWiseUnaryEx op, const ITensor *input, ITensor *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref
- * NEElementwiseUnaryKernelEx
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input First tensor input info. Data types supported: F16/F32/S32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a Status
- */
- static Status validate(ElementWiseUnaryEx op, const ITensorInfo *input,
- const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
- /** Common signature for all the specialised arithmetic functions
- *
- * @param[in] input An input tensor. Data types supported: F16/F32/S32.
- * @param[out] output The output tensor. Data types supported: Same as @p input.
- * @param[in] window Region on which to execute the kernel.
- */
- using ElementwiseUnaryFunction = void(const ITensor *input, ITensor *output,
- const Window &window);
-
-protected:
- // Inherited methods overridden:
- static Status validate_arguments(const ITensorInfo &input, const ITensorInfo &output);
-
- /** Function to use for the particular tensor types passed to configure() */
- std::function<void(const ITensor *input, ITensor *output, const Window &window)> _function;
-
- const ITensor *_input;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEPRELUKERNEL_H__
-#define __ARM_COMPUTE_NEPRELUKERNEL_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform Parametric Rectified Linear Unit
- *
- * Result is computed by:
- * @f[ output(x) = alpha * x for x < 0, output(x) = x for x >= 0 @f]
- */
-class NEPReLUKernel : public INEKernel
-{
-public:
- const char *name() const override { return "NEPReLUKernel"; }
- /** Default constructor */
- NEPReLUKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPReLUKernel(const NEPReLUKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEPReLUKernel &operator=(const NEPReLUKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEPReLUKernel(NEPReLUKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEPReLUKernel &operator=(NEPReLUKernel &&) = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input Input tensor. Data type supported: QASYMM8/F32
- * @param[in] alpha Alpha tensor. Data types supported: Same as @p input
- * @param[out] output Output tensor. Data types supported: Same as @p input
- */
- void configure(const ITensor *input, const ITensor *alpha, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
- /** Static function to check if given info will lead to a valid configuration of @ref
- * NEPReLUKernel.h
- *
- * @param[in] input Input tensor input info. Data types supported: QASYMM8/F32.
- * @param[in] alpha Alpha tensor input info. Data types supported: Same as @p input.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *alpha,
- const ITensorInfo *output);
- static Status validate_arguments(const ITensorInfo &input, const ITensorInfo &alpha,
- const ITensorInfo &output);
-
-private:
- const ITensor *_input; /**< Source tensor */
- const ITensor *_alpha; /**< Alpha tensor */
- ITensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEPRELUKERNEL_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__
-#define __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the space to depth kernel */
-class NESpaceToDepthLayerKernelEx : public INEKernel
-{
-public:
- const char *name() const override { return "NESpaceToDepthLayerKernelEx"; }
- /** Default constructor */
- NESpaceToDepthLayerKernelEx();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESpaceToDepthLayerKernelEx(const NESpaceToDepthLayerKernelEx &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESpaceToDepthLayerKernelEx &operator=(const NESpaceToDepthLayerKernelEx &) = delete;
- /** Allow instances of this class to be moved */
- NESpaceToDepthLayerKernelEx(NESpaceToDepthLayerKernelEx &&) = default;
- /** Allow instances of this class to be moved */
- NESpaceToDepthLayerKernelEx &operator=(NESpaceToDepthLayerKernelEx &&) = default;
- /** Default destructor */
- ~NESpaceToDepthLayerKernelEx() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[out] output Tensor output. Data types supported: same as @p input
- * @param[in] block_shape Block shape value
- */
- void configure(const ITensor *input, ITensor *output, int32_t block_shape);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * NESpaceToDepthLayerKernelEx
- *
- * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[in] output Tensor output info. Data types supported: same as @p input
- * @param[in] block_shape Block shape value
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input; /**< Source tensor */
- ITensor *_output; /**< Destination tensor */
- int32_t _block_shape; /**< Block shape */
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__ */
#ifndef __ARM_COMPUTE_CLFUNCTIONSEX_H__
#define __ARM_COMPUTE_CLFUNCTIONSEX_H__
-#include <arm_compute/runtime/CL/functions/CLArgOperation.h>
-#include <arm_compute/runtime/CL/functions/CLBatchToSpaceND.h>
#include <arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h>
-#include <arm_compute/runtime/CL/functions/CLCast.h>
-#include <arm_compute/runtime/CL/functions/CLDepthToSpace.h>
#include <arm_compute/runtime/CL/functions/CLEmbeddingLookup.h>
#include <arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h>
#include <arm_compute/runtime/CL/functions/CLGatherEx.h>
#include <arm_compute/runtime/CL/functions/CLHashtableLookup.h>
#include <arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h>
-#include <arm_compute/runtime/CL/functions/CLLogicalNot.h>
#include <arm_compute/runtime/CL/functions/CLNeg.h>
-#include <arm_compute/runtime/CL/functions/CLPixelWiseDivision.h>
-#include <arm_compute/runtime/CL/functions/CLPReLU.h>
#include <arm_compute/runtime/CL/functions/CLReduceOperation.h>
-#include <arm_compute/runtime/CL/functions/CLRNNLayerEx.h>
-#include <arm_compute/runtime/CL/functions/CLSpaceToDepth.h>
-#include <arm_compute/runtime/CL/functions/CLSplit.h>
-#include <arm_compute/runtime/CL/functions/CLStridedSliceEx.h>
#include <arm_compute/runtime/CL/functions/CLTopKV2.h>
#include <arm_compute/runtime/CL/functions/CLTransposeConvLayer.h>
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/**
- * @file CLArgOperation.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLArgOperation class
- */
-
-#ifndef __ARM_COMPUTE_CLARGOPERATION_H__
-#define __ARM_COMPUTE_CLARGOPERATION_H__
-
-#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to execute CLArgOperation operation
- */
-class CLArgOperation : public IFunction
-{
-public:
- /**
- * @brief Construct a new CLArgOperation object
- */
- CLArgOperation();
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- */
- CLArgOperation(const CLArgOperation &) = delete;
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- */
- CLArgOperation &operator=(const CLArgOperation &) = delete;
-
- /**
- * @brief Construct a new CLArgOperation object by using copy constructor
- * @param[in] CLArgOperation object to move
- */
- CLArgOperation(CLArgOperation &&) = default;
-
- /**
- * @brief Assign a CLArgOperation object.
- * @param[in] CLArgOperation object to assign. This object will be moved.
- */
- CLArgOperation &operator=(CLArgOperation &&) = default;
-
- /**
- * @brief Initialise the kernel's inputs and outputs.
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32.
- * @param[out] output The result of arg operation. Data types supported: S32.
- * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
- * @param[in] op Arg operation to perform.
- * @return N/A
- */
- void configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis, ArgOperation op);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32.
- * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
- * @param[out] output The result of arg operation. Data types supported: S32.
- * @param[in] op Arg operation to perform.
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const std::vector<uint32_t> &axis,
- const ITensorInfo *output, ArgOperation op);
- /**
- * @brief Run the OpenCL kernel for this operation
- * @return N/A
- */
- void run() override;
-
-private:
- ICLTensor *_input{nullptr};
- ICLTensor *_output{nullptr};
- std::vector<uint32_t> _axis{};
- ArgOperation _arg_op{ArgOperation::MAX};
-
- std::unique_ptr<CLTensor[]> _interm_tensors{nullptr};
- std::unique_ptr<CLArgOperationKernel[]> _argop_kernels{nullptr};
- size_t _num_of_kernels{0};
-};
-}
-#endif /*__ARM_COMPUTE_CLARGOPERATION_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__
-#define __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLBatchToSpaceNDKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function converts the input tensor to the tensor of the output tensor's type.
- */
-class CLBatchToSpaceND : public ICLSimpleFunction
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] block_size A pointer to an array of integer values specifying block sizes
- * for spatial dimension.
- */
- void configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size);
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/**
- * @file CLCast.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLCast class
- */
-
-#ifndef __ARM_COMPUTE_CLCAST_H__
-#define __ARM_COMPUTE_CLCAST_H__
-
-#include "arm_compute/core/TypesEx.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to run @ref CLCastKernel.
- * This converts the input tensor to the tensor of the output tensor's type.
- */
-class CLCast : public ICLSimpleFunction
-{
-public:
- /**
- * @brief Initialise the kernel's input and output
- * @param[in, out] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * The input tensor is [in, out] because its TensorInfo might be
- * modified inside the kernel.
- * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] input_subtype Sub data type of input.
- */
- void configure(ICLTensor *input, ICLTensor *output, SubDataType input_subtype);
-};
-}
-#endif /* __ARM_COMPUTE_CLCAST_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLDEPTHTOSPACE_H__
-#define __ARM_COMPUTE_CLDEPTHTOSPACE_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLDepthToSpaceKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function converts the input tensor to the tensor of the output tensor's type.
- */
-class CLDepthToSpace : public ICLSimpleFunction
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[block_size] block size integer only
- */
- void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size);
-};
-} // namesace arm_compute
-
-#endif /* __ARM_COMPUTE_CLDEPTHTOSPACE_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__
+#define __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__
+
+#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
+#include "arm_compute/runtime/CL/functions/CLReverse.h"
+#include "arm_compute/runtime/CL/functions/CLTranspose.h"
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+/** Function to run the deconvolution layer.
+ *
+ * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input
+ * depending on the stride and pad info and then perform a 1x1
+ * convolution pass. Input stride defines how many zeroes we should put between each element of the
+ * input and pad is the amount of padding.
+ *
+ * The relation between input to output is as follows:
+ * \f[
+ * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x
+ * \f]
+ * \f[
+ * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y
+ * \f]
+ *
+ * where:
+ * width_input is the size of the first input dimension.
+ * height_input is the size of the second input dimension.
+ * width_output is the size of the first output dimension.
+ * height_output is the size of the second output dimension.
+ * kernel_x and kernel_y are the convolution sizes in x and y.
+ * stride_x and stride_y is the input stride of the first and second dimension.
+ *
+ * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution.
+ * Therefore, it will be necessary to use the weights in the
+ * reverse order to perform an actual convolution. This is achieved by using @ref CLReverse.
+ *
+ * This function calls the following OpenCL kernels/functions:
+ *
+ * -# @ref CLDeconvolutionLayerUpsample
+ * -# @ref CLConvolutionLayer
+ *
+ * And the following CPP kernels:
+ * -# @ref CLReverse
+ *
+ */
+class CLDirectTransposeConvLayer : public IFunction
+{
+public:
+ /** Constructor */
+ CLDirectTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDirectTransposeConvLayer(const CLDirectTransposeConvLayer &) = delete;
+ /** Default move constructor */
+ CLDirectTransposeConvLayer(CLDirectTransposeConvLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDirectTransposeConvLayer &operator=(const CLDirectTransposeConvLayer &) = delete;
+ /** Default move assignment operator */
+ CLDirectTransposeConvLayer &operator=(CLDirectTransposeConvLayer &&) = default;
+ /** Set the input, weights, biases and output tensors.
+ *
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
+ * supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Should match @p input data type, except for
+ * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+ * @param[out] output Output tensor. The output has the same number of dimensions as the
+ * @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this
+ * is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ *
+ */
+ void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
+ const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom,
+ const WeightsInfo &weights_info = WeightsInfo());
+ /** Set the input, weights, biases and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and
+ * an optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data
+ * type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Should match @p input data type, except for
+ * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+ * @param[out] output Output tensor. The output has the same number of dimensions as
+ * the @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref
+ * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
+ * CLWeightsReshapeKernel.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
+ const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info,
+ unsigned int invalid_right, unsigned int invalid_bottom,
+ const WeightsInfo &weights_info = WeightsInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLDirectTransposeConvLayer
+ *
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs.
+ * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data
+ * type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension.
+ * Data type supported: Should match @p input data type, except for input
+ * of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+ * @param[in] output Output tensor info. The output has the same number of dimensions as the
+ * @p input.
+ * @param[in] info Contains padding and policies to be used in the deconvolution, this is
+ * decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info,
+ unsigned int invalid_right, unsigned int invalid_bottom,
+ const WeightsInfo &weights_info = WeightsInfo());
+
+ // Inherited methods overridden:
+ void run() override;
+ void prepare() override;
+
+private:
+ MemoryGroup _memory_group;
+ CLDeconvolutionLayerUpsample _scale_f;
+ CLConvolutionLayer _conv_f;
+ CLReverse _flip_weights;
+
+ CLTensor _scaled_output;
+ ICLTensor *_original_weights;
+ CLTensor _weights_flipped;
+ CLTensor _flip_axis;
+
+ bool _is_prepared;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__ */
#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
namespace arm_compute
{
CLFullyConnectedHybridLayerReshapeWeights _reshape_weights_kernel;
CLScaleFactorSymm8Kernel _scale_factor_kernel;
CLQuantizationSymmetricKernel _quant_input_kernel;
- CLGEMMLowpMatrixMultiplyCoreEx _mm_gemmlowp;
+ CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
CLMultiplyScaleFactorKernel _multiply_scale_kernel;
CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; // TODO(COMPMID-1889): Use CLGEMM to
// add bias in
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__
-#define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__
-
-#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-namespace arm_compute
-{
-class IMemoryManager;
-class ICLTensor;
-
-/** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. This function calls the
- * following OpenCL kernels:
- *
- * -# @ref CLGEMMLowpMatrixMultiplyKernel (if the parameter "reshape_b_only_on_first_run" of
- * GEMMInfo is FALSE)
- * -# @ref CLGEMMLowpMatrixAReductionKernel (if the offset of matrix B is not 0)
- * -# @ref CLGEMMLowpMatrixBReductionKernel (if the offset of matrix A is not 0)
- *
-*/
-class CLGEMMLowpMatrixMultiplyCoreEx : public IFunction
-{
-public:
- /** Constructor */
- CLGEMMLowpMatrixMultiplyCoreEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyCoreEx(const CLGEMMLowpMatrixMultiplyCoreEx &) = delete;
- /** Default move constructor */
- CLGEMMLowpMatrixMultiplyCoreEx(CLGEMMLowpMatrixMultiplyCoreEx &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyCoreEx &operator=(const CLGEMMLowpMatrixMultiplyCoreEx &) = delete;
- /** Default move assignment operator */
- CLGEMMLowpMatrixMultiplyCoreEx &operator=(CLGEMMLowpMatrixMultiplyCoreEx &&) = default;
- /** Initialise the kernel's inputs, output
- *
- * @note GEMMLowp: low precision GEMM kernel. [A * B + C]
- * This kernel performs the following computations:
- *
- * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
- * -# Convert b values from QASYMM8 to int32 and add b_offset to each of them.
- * -# Compute the matrix product of the resulting a * b in int32.
- * -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE
- *
- * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8.
- * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
- * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported:
- * S32
- * @param[out] output Output tensor. Data type supported: S32 or QASYMM8 if
- * gemm_info.gemmlowp_output_stage != NONE
- * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped
- * and
- * if the reshape of matrix B should be executed only for the first run
- */
- void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output,
- const GEMMInfo &gemm_info = GEMMInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLGEMMLowpMatrixMultiplyCoreEx
- *
- * @param[in] a First input tensor info (Matrix A). Data type supported: QASYMM8.
- * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a
- * @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type
- * supported: S32
- * @param[in] output Output tensor info. Data type supported: S32 or QASYMM8 if
- * gemm_info.gemmlowp_output_stage != NONE
- * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped
- * and
- * if the reshape of matrix B should be executed only for the first run
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c,
- const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo());
-
- // Inherited methods overridden:
- void run() override;
- void prepare() override;
-
-private:
- MemoryGroup _memory_group;
-
- // Kernels used
- CLGEMMLowpMatrixMultiplyKernelEx _mm_midgard_kernel;
- CLGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
- CLGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
-
- // Temporary tensors
- CLTensor _vector_sum_col;
- CLTensor _vector_sum_row;
-
- int32_t _a_offset;
- int32_t _b_offset;
- bool _reshape_b_only_on_first_run;
- bool _is_prepared;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLLOGICALNOT_H__
-#define __ARM_COMPUTE_CLLOGICALNOT_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-class CLLogicalNot : public ICLSimpleFunction
-{
-public:
- /** Initialise the function's source and destination.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8.
- * @param[out] output Output tensor. Data types supported: QASYMM8.
- */
- void configure(ICLTensor *input, ICLTensor *output);
-};
-
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLLOGICALNOT_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLPRELU_H__
-#define __ARM_COMPUTE_CLPRELU_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-class CLPReLU : public ICLSimpleFunction
-{
-public:
- /** Initialise the function's source and destination.
- *
- * @param[in] input. Data types supported:
- * QASYMM8/F16/F32.
- * @param[in] alpha. Data types supported:
- * QASYMM8/F16/F32.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- */
- void configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output);
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLPRELU_H__*/
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/**
- * @file CLPixelWiseDivision.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLPixelWiseDivision class
- */
-#ifndef __ARM_COMPUTE_CLPIXELWISEDIVISION_H__
-#define __ARM_COMPUTE_CLPIXELWISEDIVISION_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to run @ref CLPixelWiseDivisionKernel.
- */
-class CLPixelWiseDivision : public ICLSimpleFunction
-{
-public:
- /**
- * @brief Initialise the kernel's inputs, output and convertion policy.
- * @param[in, out] input1 An input tensor. Data types supported: U8/S16/F16/F32
- * The input tensor is [in, out] because its TensorInfo might be
- * modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 An input tensor. Data types supported: same as @p input1.
- * The input tensor is [in, out] because its TensorInfo might be
- * modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output The output tensor, Data types supported: same as @p input1.
- * Note: U8 requires both inputs to be U8.
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or
- * 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
- * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest
- * even.
- * @return N/A
- */
- void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale = 1.f,
- ConvertPolicy overflow_policy = ConvertPolicy::WRAP,
- RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration of @ref
- * CLPixelWiseDivision
- * @param[in] input1 An input tensor info. Data types supported: U8/S16/F16/F32
- * @param[in] input2 An input tensor info. Data types supported: same as @p input1.
- * @param[in] output The output tensor info, Data types supported: same as @p input1.
- * Note: U8 requires both inputs to be U8.
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n
- * where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
- * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output, float scale = 1.f,
- ConvertPolicy overflow_policy = ConvertPolicy::WRAP,
- RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO);
-};
-}
-#endif /*__ARM_COMPUTE_CLPIXELWISEDIVISION_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLRNN_LAYER_EX_H__
-#define __ARM_COMPUTE_CLRNN_LAYER_EX_H__
-
-#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
-#include "arm_compute/runtime/CL/functions/CLGEMM.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLRNNLayerEx */
-class CLRNNLayerEx : public IFunction
-{
-public:
- /** Default constructor */
- CLRNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Initialize the function
- *
- * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data
- * types supported: F16/F32
- * @param[in] weights Weights tensor of shape [input_size, num_units] that
- * multiplies the input. Data types supported: Same as @p input
- * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies
- * the current 'state'. Data types supported: Same as @p input
- * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same
- * as @p input
- * @param[out] output Output tensor of shape [num_units, batch_size]. Data types
- * supported: Same as @p input
- * @param[in,out] hidden_state Output tensor of shape [num_units, batch_size]. Data types
- * supported: Same as @p input
- * @param[in] info Activation layer parameter.
- */
- void configure(const ICLTensor *input, const ICLTensor *weights,
- const ICLTensor *recurrent_weights, const ICLTensor *bias, ICLTensor *hidden_state,
- ICLTensor *output, ActivationLayerInfo &info);
- /** Initialize the function
- *
- * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data
- * types supported: F16/F32
- * @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies
- * the input. Data types supported: Same as @p input
- * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the
- * current 'state'. Data types supported: Same as @p input
- * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same as @p
- * input
- * @param[in] output Output tensor of shape [num_units, batch_size]. Data types
- * supported: Same as @p input
- * @param[in] hidden_state Output tensor of shape [num_units, batch_size]. Data types
- * supported: Same as @p input
- * @param[in] info Activation layer parameter.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
- const ITensorInfo *recurrent_weights, const ITensorInfo *bias,
- const ITensorInfo *hidden_state, const ITensorInfo *output,
- const ActivationLayerInfo &info);
-
- // Inherited methods overridden:
- void run() override;
- void prepare() override;
-
-private:
- MemoryGroup _memory_group;
- CLGEMM _gemm_state_f;
- CLSaturatedArithmeticOperationKernel _add_kernel;
- CLActivationLayerKernel _activation_kernel;
- CLFullyConnectedLayer _fully_connected_kernel;
- CLCopyKernel _copy_kernel;
- CLTensor _fully_connected_out;
- CLTensor _gemm_output;
- CLTensor _add_output;
- bool _is_prepared;
-};
-}
-#endif /* __ARM_COMPUTE_CLRNN_LAYER_EX_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLSPACETODEPTH_H__
-#define __ARM_COMPUTE_CLSPACETODEPTH_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLSpaceToDepthKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function converts the input tensor to the tensor of the output tensor's type.
- */
-class CLSpaceToDepth : public ICLSimpleFunction
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[block_size] block size integer only
- */
- void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size);
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLSPACETODEPTH_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/**
- * @file CLStridedSlice.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLStridedSlice and arm_compute::CLStridedSliceCPU class
- */
-
-#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__
-#define __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to run @ref CLStridedSliceKernel
- */
-class CLStridedSliceEx : public ICLSimpleFunction
-{
-public:
- /**
- * @brief Initialise the kernel's inputs and outputs
- * @param[in] input Tensor input. Data type supported:
- * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] beginData 'begin' vector of strided slice operation
- * @param[in] endData 'end' vector of strided slice operation
- * @param[in] stridesData 'strides' vector of strided slice operation
- * @param[in] beginMask If the ith bit is set, begin[i] is ignored
- * @param[in] endMask If the ith bit is set, end[i] is ignored
- * @param[in] shrinkAxisMask If the ith bit is set, the ith specification shrinks the
- * dimensionality by 1, taking on the value at index begin[i]
- * @return N/A
- */
- void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData,
- ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask,
- int32_t shrinkAxisMask);
-};
-}
-#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ */
*/
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-
#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__
#define __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__
-#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
-#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h"
-
-#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
-
-#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
namespace arm_compute
{
-class ICLTensor;
-/** Function to run the transpose convolution layer.
- *
- * @note This layer was copied in order to fix a bug computing to wrong output dimensions.
- *
- * TransposeConv Layer is the backward pass of Convolution Layer. First we transform the input
- * depending on the stride and pad info and then perform a 1x1
- * convolution pass. Input stride defines how many zeroes we should put between each element of the
- * input, pad is the amount of padding and finally a is a user
- * specified value where a < stride - 1, that increases the padding top and right of the input
- * image.
- *
- * The relation between input to output is as follows:
- * \f[
- * width\_output = (width\_input - 1) \cdot stride\_x - \cdot padding\_x + kernel\_x
- * \f]
- * \f[
- * height\_output = (height\_input - 1) \cdot stride\_y - \cdot padding\_y + kernel\_y
- * \f]
- *
- * where:
- * width_input is the size of the first input dimension.
- * height_input is the size of the second input dimension.
- * width_output is the size of the first output dimension.
- * height_output is the size of the second output dimension.
- * kernel_x and kernel_y are the convolution sizes in x and y.
- * stride_x and stride_y is the input stride of the first and second dimension.
- *
- * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution.
- * Therefore, it will be necessary to use the weights in the
- * reverse order to perform an actual convolution. This is achieved by using the @ref
- * CPPFlipWeightsKernel.
- *
- * This function calls the following OpenCL kernels/functions:
- *
- * -# @ref CLTransposeConvLayerUpsample
- * -# @ref CLConvolutionLayer
+/** Basic function to compute the deconvolution layer. This function calls the following OpenCL
+ * kernels/functions:
*
+ * -# @ref CLGEMMDeconvolutionLayer
+ * -# @ref CLDirectTransposeConvLayer
*/
class CLTransposeConvLayer : public IFunction
{
public:
- /** Constructor */
+ /** Default constructor */
CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLTransposeConvLayer(const CLTransposeConvLayer &) = delete;
- /** Default move constructor */
- CLTransposeConvLayer(CLTransposeConvLayer &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLTransposeConvLayer &operator=(const CLTransposeConvLayer &) = delete;
- /** Default move assignment operator */
- CLTransposeConvLayer &operator=(CLTransposeConvLayer &&) = default;
+
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input,
- * and an optional 4th dimension for batch of inputs.
- * Data types supported: QASYMM8/F16/F32.
- * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM].
- * Data type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported:
- * Same as @p input.
- * @param[out] output Output tensor. The output has the same number of dimensions
- * as the @p input.
- * @param[in] info Contains padding and policies to be used in the
- * transpose convolution, this is decribed in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to top edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref
- * CLConvolutionLayer, specifies if the weights tensor has been
- * reshaped with @ref CLWeightsReshapeKernel.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
+ * supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same
+ * as @p input.
+ * @param[out] output Output tensor. The output has the same number of dimensions as the
+ * @p input.
+ * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this
+ * is described in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+ *
*/
void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
- const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom,
+ const PadStrideInfo &deconv_info, unsigned int invalid_right,
+ unsigned int invalid_bottom, const WeightsInfo &weights_info = WeightsInfo());
+ /** Set the input, weights, biases and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and
+ * an optional 4th dimension for batch of inputs. Data types supported:
+ * QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data
+ * type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported:
+ * Same as @p input.
+ * @param[out] output Output tensor. The output has the same number of dimensions as
+ * the @p input.
+ * @param[in] deconv_info Contains padding and policies to be used in the deconvolution,
+ * this is described in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref
+ * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
+ * CLWeightsReshapeKernel.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
+ const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info,
+ unsigned int invalid_right, unsigned int invalid_bottom,
const WeightsInfo &weights_info = WeightsInfo());
/** Static function to check if given info will lead to a valid configuration of @ref
- * CLTransposeConvLayer
+ * CLTransposeConvLayer
+ *
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data
+ * type supported: Same as @p input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported: Same as
+ * @p input.
+ * @param[in] output Output tensor info. The output has the same number of dimensions as the
+ * @p input.
+ * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is
+ * described in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
+ * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input,
- * and an optional 4th dimension for batch of inputs.
- * Data types supported: QASYMM8/F16/F32.
- * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM].
- * Data type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported:
- * Same as @p input.
- * @param[in] output Output tensor info. The output has the same number of dimensions
- * as the @p input.
- * @param[in] info Contains padding and policies to be used in the
- * transpose convolution, this is decribed in @ref PadStrideInfo.
- * @param[in] innvalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to top edge of the output.
- * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
- * specifies if the weights tensor has been reshaped with @ref
- * CLWeightsReshapeKernel.
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
- const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info,
- unsigned int innvalid_right, unsigned int invalid_bottom,
+ const ITensorInfo *bias, ITensorInfo *output,
+ const PadStrideInfo &deconv_info, unsigned int invalid_right,
+ unsigned int invalid_bottom,
const WeightsInfo &weights_info = WeightsInfo());
+ static DeconvolutionMethod
+ get_deconvolution_method(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *bias, ITensorInfo *output,
+ const PadStrideInfo &deconv_info, unsigned int invalid_right,
+ unsigned int invalid_bottom, const WeightsInfo &weights_info);
// Inherited methods overridden:
void run() override;
void prepare() override;
private:
- MemoryGroup _memory_group;
- CLTransposeConvLayerUpsample _scale_f;
- CLConvolutionLayer _conv_f;
- CPPFlipWeightsKernel _flip_weights;
- CLTensor _scaled_output;
- ICLTensor *_original_weights;
- CLTensor _weights_flipped;
- bool _is_prepared;
+ std::shared_ptr<IMemoryManager> _memory_manager;
+ std::unique_ptr<IFunction> _function;
};
-}
+} // namespace arm_compute
#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__
-#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLTransposeConvLayerUpsampleKernel */
-class CLTransposeConvLayerUpsample : public IFunction
-{
-public:
- /** Default constructor */
- CLTransposeConvLayerUpsample();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLTransposeConvLayerUpsample(const CLTransposeConvLayerUpsample &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLTransposeConvLayerUpsample &operator=(const CLTransposeConvLayerUpsample &) = delete;
- /** Allow instances of this class to be moved */
- CLTransposeConvLayerUpsample(CLTransposeConvLayerUpsample &&) = default;
- /** Allow instances of this class to be moved */
- CLTransposeConvLayerUpsample &operator=(CLTransposeConvLayerUpsample &&) = default;
- /** Default destructor */
- virtual ~CLTransposeConvLayerUpsample() = default;
-
- /** Initialize the function's source, destination, interpolation type and border_mode.
- *
- * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] inner_border The number of zeros added to right and top edges of the input.
- * @param[in] info Contains padding and policies to be used in the deconvolution.
- */
- void configure(ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
- const PadStrideInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLTransposeConvLayerUpsample
- *
- * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input.
- * @param[in] inner_border The number of zeros added to right and top edges of the input.
- * @param[in] info Contains padding and policies to be used in the deconvolution.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const BorderSize &inner_border, const PadStrideInfo &info);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- CLTransposeConvLayerUpsampleKernel _upsample;
- ICLTensor *_output;
-};
-}
-#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CPPUPSAMPLE_EX_H__
-#define __ARM_COMPUTE_CPPUPSAMPLE_EX_H__
-
-#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
-
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref CPPUpsample */
-class CPPUpsampleEx : public ICPPSimpleFunction
-{
-public:
- /** Configure the upsample CPP kernel
- *
- * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8
- * @param[out] output The output tensor. Data types supported: Same as @p input
- * @param[in] info Padding information
- */
- void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info);
-};
-}
-#endif /* __ARM_COMPUTE_CPPUPSAMPLE_EX_H__ */
#include <arm_compute/runtime/NEON/functions/NEActivationLayerEx.h>
#include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h>
-#include <arm_compute/runtime/NEON/functions/NECast.h>
-#include <arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h>
#include <arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
#include <arm_compute/runtime/NEON/functions/NEGatherEx.h>
#include <arm_compute/runtime/NEON/functions/NEHashtableLookup.h>
#include <arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h>
-#include <arm_compute/runtime/NEON/functions/NEPReLU.h>
-#include <arm_compute/runtime/NEON/functions/NEReduceMeanEx.h>
#include <arm_compute/runtime/NEON/functions/NEReduceSum.h>
-#include <arm_compute/runtime/NEON/functions/NERNNLayerEx.h>
#include <arm_compute/runtime/NEON/functions/NEReduceOperation.h>
-#include <arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h>
-#include <arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h>
#include <arm_compute/runtime/NEON/functions/NETransposeConvLayer.h>
#endif // __ARM_COMPUTE_NEFUNCTIONSEX_H__
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NECAST_H__
-#define __ARM_COMPUTE_NECAST_H__
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Basic function to run @ref NECastKernel that converts an input tensor to the other types */
-class NECast : public INESimpleFunctionNoBorder
-{
-public:
- /** Configure the kernel.
- *
- * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U32/S32/F32.
- * @param[out] output Destination tensor with the same dimensions of input. Data type supported:
- * U8/S8/QASYMM8/U32/S32/F32.
- * @param[in] input_subtype Sub data type of input.
- */
- void configure(const ITensor *input, ITensor *output,
- SubDataType input_subtype = SubDataType::NONE);
- /** Static function to check if given info will lead to a valid configuration of @ref NECast
- *
- * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U32/S32/F32.
- * @param[in] output Output tensor info. Data type supported: U8/S8/QASYMM8/U32/S32/F32.
- * @param[in] input_subtype Sub data type of input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- SubDataType input_subtype = SubDataType::NONE);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NECAST_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__
-#define __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEDepthToSpaceLayerKernelEx. */
-class NEDepthToSpaceLayerEx : public INESimpleFunctionNoBorder
-{
-public:
- /** Set the input and output tensors.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[out] output Tensor output. Data types supported: same as @p input
- * @param[in] block_shape Block shape value.
- */
- void configure(const ITensor *input, ITensor *output, int32_t block_shape);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * NEDepthToSpaceLayerEx.
- *
- * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[in] output Tensor output info. Data types supported: same as @p input
- * @param[in] block_shape Block shape x value.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__
-#define __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__
-
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to perform negative on an input tensor. */
-class NENegLayer : public INESimpleFunction
-{
-public:
- /** Initialize the function
- *
- * @param[in] input Input tensor. Data types supported: F16/F32/S32.
- * @param[out] output Output tensor. Data types supported: same as @p input.
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NERsqrtLayer
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32/S32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__ */
#include "arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h"
#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include "arm_compute/runtime/Tensor.h"
MemoryGroup _memory_group;
NEFullyConnectedHybridLayerReshapeWeights _reshape_weights_function;
NEQuantizationSymmetricKernel _quant_input_kernel;
- NEGEMMLowpMatrixMultiplyCoreEx _mm_gemmlowp;
+ NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
NEMultiplyScaleFactorKernel _multiply_scale_kernel;
NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
Tensor _reshape_weights_output;
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__
-#define __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-// #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following
- * NEON kernels if the DOT product instruction is not available:
- *
- * -# @ref NEGEMMInterleave4x4Kernel
- * -# @ref NEGEMMTranspose1xWKernel
- * -# @ref NEGEMMLowpMatrixMultiplyKernel
- * -# @ref NEGEMMLowpOffsetContributionKernel
- * -# @ref NEActivationLayer
- *
- * otherwise if the DOT product instruction is available:
- *
- * -# @ref NEGEMMLowpOffsetContributionKernel
- *
-*/
-class NEGEMMLowpMatrixMultiplyCoreEx : public IFunction
-{
-public:
- /** Constructor */
- NEGEMMLowpMatrixMultiplyCoreEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMLowpMatrixMultiplyCoreEx(const NEGEMMLowpMatrixMultiplyCoreEx &) = delete;
- /** Default move constructor */
- NEGEMMLowpMatrixMultiplyCoreEx(NEGEMMLowpMatrixMultiplyCoreEx &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEGEMMLowpMatrixMultiplyCoreEx &operator=(const NEGEMMLowpMatrixMultiplyCoreEx &) = delete;
- /** Default move assignment operator */
- NEGEMMLowpMatrixMultiplyCoreEx &operator=(NEGEMMLowpMatrixMultiplyCoreEx &&) = default;
- /** Initialise the kernel's inputs, output
- *
- * @note GEMM_LOWP: low precision GEMM kernel
- * This kernel performs the following computations:
- *
- * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
- * -# Convert b values from QASYMM8 to int32 add b_offset to each of them.
- * -# Compute the matrix product of the resulting a * b in int32.
- *
- * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is
- * QASYMM8/QASYMM8_SIGNED otherwise
- *
- * @param[in] a First input tensor (Matrix A). Data type supported:
- * QASYMM8/QASYMM8_SIGNED.
- * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
- * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported:
- * S32
- * @param[out] output Output tensor. Data type supported: Data type supported:
- * S32/QASYMM8/QASYMM8_SIGNED
- * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped
- * and
- * if the reshape of matrix B should be executed only for the first run
- */
- void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output,
- const GEMMInfo &gemm_info = GEMMInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref
- * NEGEMMLowpMatrixMultiplyCoreEx
- *
- * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is
- * QASYMM8/QASYMM8_SIGNED otherwise
- *
- * @param[in] a First input tensor info (Matrix A). Data type supported:
- * QASYMM8/QASYMM8_SIGNED.
- * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a
- * @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type
- * supported: S32
- * @param[in] output Output tensor info. Data type supported: Data type supported:
- * S32/QASYMM8/QASYMM8_SIGNED
- * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped
- * and
- * if the reshape of matrix B should be executed only for the first run
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c,
- const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo());
-
- // Inherited methods overridden
- void run() override;
- void prepare() override;
-
-private:
- MemoryGroup _memory_group;
- NEGEMMAssemblyDispatch _asm_glue;
- std::unique_ptr<INEKernel> _mm_kernel;
- std::unique_ptr<INEKernel> _mtx_a_reshape_kernel;
- std::unique_ptr<INEKernel> _mtx_b_reshape_kernel;
- NEGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
- NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
- NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel;
- NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
-
- Tensor _vector_sum_col;
- Tensor _vector_sum_row;
- Tensor _tmp_a;
- Tensor _tmp_b;
- Tensor _mm_result_s32;
- Tensor _signed_a;
- Tensor _signed_output;
- const ITensor *_original_b;
- int32_t _a_offset;
- int32_t _b_offset;
-
- bool _run_vector_matrix_multiplication;
- bool _assembly_path;
- bool _fused_assembly_path;
- bool _reshape_b_only_on_first_run;
- bool _is_prepared;
- bool _fuse_output_stage;
- bool _flip_signedness;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEPRELU_H__
-#define __ARM_COMPUTE_NEPRELU_H__
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEPReLUKernel */
-class NEPReLU : public INESimpleFunctionNoBorder
-{
-public:
- /** Initialise the kernel's inputs and output
- *
- * @param[in] input. Data types supported: QASYMM8/F32.
- * @param[in] alpha. Data types supported: Same as @p input.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- */
- void configure(const ITensor *input, const ITensor *alpha, ITensor *output);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEPRELU_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NERNNLAYER_EX_H__
-#define __ARM_COMPUTE_NERNNLAYER_EX_H__
-
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Basic function to run @ref NERNNLayerEx */
-class NERNNLayerEx : public IFunction
-{
-public:
- /** Default constructor */
- NERNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NERNNLayerEx(const NERNNLayerEx &) = delete;
- /** Default move constructor */
- NERNNLayerEx(NERNNLayerEx &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NERNNLayerEx &operator=(const NERNNLayerEx &) = delete;
- /** Default move assignment operator */
- NERNNLayerEx &operator=(NERNNLayerEx &&) = default;
- /** Initialize the function
- *
- * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data
- * types supported: F16/F32
- * @param[in] weights Weights tensor of shape [input_size, num_units] that
- * multiplies the input. Data types supported: Same as @p input
- * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies
- * the current 'state'. Data types supported: Same as @p input
- * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same
- * as @p input
- * @param[out] output Output tensor of shape [num_units, batch_size]. Data types
- * supported: Same as @p input
- * @param[in,out] hidden_state Output tensor of shape [num_units, batch_size]. Data types
- * supported: Same as @p input
- * @param[in] info Activation layer parameter.
- */
- void configure(const ITensor *input, const ITensor *weights, const ITensor *recurrent_weights,
- const ITensor *bias, ITensor *hidden_state, ITensor *output,
- ActivationLayerInfo &info);
- /** Initialize the function
- *
- * @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data
- * types supported: F16/F32
- * @param[in] weights Weights tensor of shape [input_size, num_units] that multiplies
- * the input. Data types supported: Same as @p input
- * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the
- * current 'state'. Data types supported: Same as @p input
- * @param[in] bias Bias vector of shape [num_units]. Data types supported: Same as @p
- * input
- * @param[in] output Output tensor of shape [num_units, batch_size]. Data types
- * supported: Same as @p input
- * @param[in] hidden_state Output tensor of shape [num_units, batch_size]. Data types
- * supported: Same as @p input
- * @param[in] info Activation layer parameter.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
- const ITensorInfo *recurrent_weights, const ITensorInfo *bias,
- const ITensorInfo *hidden_state, const ITensorInfo *output,
- const ActivationLayerInfo &info);
-
- // Inherited methods overridden:
- void run() override;
- void prepare() override;
-
-private:
- MemoryGroup _memory_group;
- NEGEMM _gemm_state_f;
- NEArithmeticAdditionKernel _add_kernel;
- NEActivationLayerKernel _activation_kernel;
- NEFullyConnectedLayer _fully_connected_kernel;
- NECopyKernel _copy_kernel;
- Tensor _fully_connected_out;
- Tensor _gemm_output;
- Tensor _add_output;
- bool _is_prepared;
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NERNNLAYER_EX_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__
-#define __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
-#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to perform reduce operation */
-class NEReduceMeanEx : public IFunction
-{
-public:
- /** Constructor */
- NEReduceMeanEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Configure kernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
- * @param[in] reduction_axis Reduction axis vector.
- * @param[in] keep_dims If positive, retains reduced dimensions with length 1.
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- */
- void configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims,
- ITensor *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref
- * NEReduceMeanEx
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
- * @param[in] reduction_axis Reduction axis vector.
- * @param[in] keep_dims If positive, retains reduced dimensions with length 1.
- * @param[in] output Destination tensor. Data type supported: Same as @p input
- *
- * @return A status
- */
- static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis,
- bool keep_dims, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- std::unique_ptr<NEReductionOperation[]> _reduction_kernels{nullptr};
- std::unique_ptr<Tensor[]> _reduced_outs{nullptr};
- NEReshapeLayer _reshape;
- unsigned int _reduction_ops;
- bool _keep_dims;
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__
-#define __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to spatial divide a tensor. This function calls the following NEON
- * kernels/functions:
- *
- * -# @ref NEMemsetKernel
- * -# @ref NESpaceToBatchLayerKernel
- */
-class NESpaceToBatchLayerEx : public IFunction
-{
-public:
- /** Default constructor */
- NESpaceToBatchLayerEx();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESpaceToBatchLayerEx(const NESpaceToBatchLayerEx &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NESpaceToBatchLayerEx &operator=(const NESpaceToBatchLayerEx &) = delete;
- /** Allow instances of this class to be moved */
- NESpaceToBatchLayerEx(NESpaceToBatchLayerEx &&) = default;
- /** Allow instances of this class to be moved */
- NESpaceToBatchLayerEx &operator=(NESpaceToBatchLayerEx &&) = default;
- /** Default destructor */
- virtual ~NESpaceToBatchLayerEx() = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings,
- ITensor *output);
- /** Set the input and output tensors. (Static block shape and paddings)
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[in] padding_left The left padding of the output tensor.
- * @param[in] padding_right The right padding of the output tensor.
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ITensor *input, const int block_shape_x, const int block_shape_y,
- const Size2D &padding_left, const Size2D &padding_right, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * NESpaceToBatchLayerEx
- *
- * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[in] block_shape block shape tensor info with shape [M]. Data types supported: S32
- * @param[in] paddings paddings tensor info with shape [2, M]. Data types supported: S32
- * @param[in] output Tensor output info. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape,
- const ITensorInfo *paddings, const ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * NESpaceToBatchLayerEx (Static block shape and paddings)
- *
- * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[in] padding_left The left padding of the output tensor.
- * @param[in] padding_right The right padding of the output tensor.
- * @param[in] output Tensor output info. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y,
- const Size2D &padding_left, const Size2D &padding_right,
- const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- NESpaceToBatchLayerKernel _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
- NEMemsetKernel _memset_kernel; /**< Memset kernel to run */
- bool _has_padding; /**< Flag to check if the output has padding */
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__
-#define __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** This function calls the following NEON kernels/functions:
- *
- * -# @ref NESpaceToDepthLayerKernelEx
- */
-class NESpaceToDepthLayerEx : public INESimpleFunctionNoBorder
-{
-public:
- /** Set the input and output tensors.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[out] output Tensor output. Data types supported: same as @p input
- * @param[in] block_shape Block shape value
- */
- void configure(const ITensor *input, ITensor *output, int32_t block_shape);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * NESpaceToDepthLayerEx (Static block shape and paddings)
- *
- * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[in] output Tensor output info. Data types supported: same as @p input
- * @param[in] block_shape Block shape value
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__ */
*/
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-
#ifndef __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__
#define __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__
-#include "arm_compute/runtime/CPP/functions/CPPUpsampleEx.h"
+#include "arm_compute/runtime/CPP/functions/CPPUpsample.h"
#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEPermute.h"
+#include "arm_compute/runtime/NEON/functions/NEReverse.h"
-#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
{
/** Function to run the deconvolution layer.
*
- * Transpose convolution Layer is the backward pass of Convolution Layer. First we transform the
- * input depending on the stride and pad info and then perfrom a 1x1
+ * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input
+ * depending on the stride and pad info and then perfrom a 1x1
* convolution pass. Input stride defines how many zeroes we should put between each element of the
* input, pad is the amount of padding and finaly a is a user
* specified value where a < stride - 1 that increases the padding top and right of the input image.
* kernel_x and kernel_y are the convolution sizes in x and y.
* stride_x and stride_y is the input stride of the first and second dimension.
*
- * The weights used by Transpose convolution are supposed to be the same as the ones used for
- * Convolution. Therefore, it will be necessary to use the weights in the
- * reverse order to perform an actual convolution. This is achieved by using the @ref
- * CPPFlipWeightsKernel.
+ * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution.
+ * Therefore, it will be necessary to use the weights in the
+ * reverse order to perform an actual convolution. This is achieved by using @ref NEReverse.
*
* This function calls the following NEON kernels/functions:
*
- * -# @ref CPPUpsample
+ * -# @ref CPPUpsampleEx
* -# @ref NEConvolutionLayer
+ * -# @ref NEPermute
+ * -# @ref NEReverse
*
*/
class NETransposeConvLayer : public IFunction
{
public:
- /** Default constructor */
+ /** Constructor */
NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
/** Set the input, weights, biases and output tensors.
*
* @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8.
+ * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
* @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
+ * supported: Same as @p input.
* @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type
- * supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input.
+ * supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16
+ * for F16 input.
* @param[out] output Output tensor. The output has the same number of dimensions as the @p
- * input.
+ * input.
* @param[in] info Contains padding and policies to be used in the deconvolution, this is
- * decribed in @ref PadStrideInfo.
- * @param[in] invalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to top edge of the output.
+ * decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
*
*/
void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output,
const PadStrideInfo &info, unsigned int invalid_right,
unsigned int invalid_bottom);
/** Static function to check if given info will lead to a valid configuration of @ref
- * NETransposeConvLayer
+ * NETransposeConvLayer
*
* @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an
- * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8.
+ * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
* @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type
- * supported: Same as @p input.
+ * supported: Same as @p input.
* @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types
- * supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input.
+ * supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
* @param[in] output Output tensor info. The output has the same number of dimensions as the @p
- * input.
+ * input.
* @param[in] info Contains padding and policies to be used in the deconvolution, this is
- * decribed in @ref PadStrideInfo.
- * @param[in] innvalid_right The number of zeros added to right edge of the output.
- * @param[in] invalid_bottom The number of zeros added to top edge of the output.
+ * decribed in @ref PadStrideInfo.
+ * @param[in] innvalid_right The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom The number of zeros added to bottom edge of the output.
*
* @return a status
*/
private:
MemoryGroup _memory_group;
NEConvolutionLayer _conv_f;
- CPPUpsampleEx _upsample_f;
- CPPFlipWeightsKernel _flip_weights;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
+ CPPUpsample _upsample_f;
+ NEReverse _flip_weights;
Tensor _scaled_output;
Tensor _weights_flipped;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- bool _is_nchw;
+ Tensor _flip_axis;
const ITensor *_original_weights;
ITensor *_input;
PadStrideInfo _info;
const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map = {
// ARMComputeEx kernels
- {"arg_op", "arg_operation.cl"},
- {"arithmetic_add_qasymm8", "arithmetic_op_quantized.cl"},
{"binary_logical_op", "binary_logical_op.cl"},
- {"cast", "cast.cl"},
- {"cast_qasymm_in", "cast.cl"},
- {"cast_qasymm_out", "cast.cl"},
- {"comparison_op", "comparison_op.cl"},
- {"comparison_op_qasymm8", "comparison_op_quantized.cl"},
- {"depth_to_space_nchw", "depth_to_space.cl"},
- {"depth_to_space_nhwc", "depth_to_space.cl"},
{"embedding_lookup", "embedding_lookup.cl"},
{"gather_ex", "gather_ex.cl"},
{"gather_ex_1d", "gather_ex.cl"},
{"instance_normalization_ex", "instance_normalization_ex.cl"},
{"multiply_scale_factor", "multiply_scale_factor.cl"},
{"neg_tensor", "neg_tensor.cl"},
- {"permute_generic", "permute_ex.cl"},
- {"pixelwise_mul_qasymm8", "pixelwise_mul_quantized.cl"},
- {"prelu", "prelu.cl"},
- {"prelu_qasymm8", "prelu_quantized.cl"},
{"quantization_symm8", "quantization_symm8.cl"},
{"reduce_min_max", "reduce_operation.cl"},
{"reduce_sum_mean", "reduce_operation.cl"},
{"radixsort_reorder", "topkv2_radixsort.cl"},
{"topkv2_quicksort", "topkv2_quicksort.cl"},
{"scale_factor_symm8", "scale_factor.cl"},
- {"space_to_depth_nchw", "space_to_depth.cl"},
- {"space_to_depth_nhwc", "space_to_depth.cl"},
};
const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map = {
#ifdef EMBEDDED_KERNELS
{
- "arg_operation.cl",
-#include "./cl_kernels/arg_operation.clembed"
- },
- {
- "cast.cl",
-#include "./cl_kernels/cast.clembed"
- },
- {
"embedding_lookup.cl",
#include "./cl_kernels/embedding_lookup.clembed"
},
{
- "depth_to_space.cl",
-#include "./cl_kernels/depth_to_space.clembed"
- },
- {
"gather_ex.cl",
#include "./cl_kernels/gather_ex.clembed"
},
#include "./cl_kernels/neg_tensor.clembed"
},
{
- "prelu.cl",
-#include "./cl_kernels/prelu.clembed"
- },
- {
- "prelu_quantized.cl",
-#include "./cl_kernels/prelu_quantized.clembed"
- },
- {
"quantization_symm8.cl",
#include "./cl_kernels/quantization_symm8.clembed"
},
#include "./cl_kernels/scale_factor.clembed"
},
{
- "space_to_depth.cl",
-#include "./cl_kernels/space_to_depth.clembed"
- },
- {
"topkv2.cl",
#include "./cl_kernels/topkv2.clembed"
},
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "helpers.h"
-
-#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE)
-/** Perform arg_max/arg_min
- *
- * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type.
- * e.g. -DDATA_TYPE=short
- * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size.
- * e.g. -DDEPTH_OUT=16
- * @attention Operation type(code) specifying which operation to perform should be passed as
- * preprocessor argument using -DOP_CODE = number. e.g. -DOP_CODE=1
- *
- * @param[in] input_ptr Pointer to the source image. Supported data
- * types:
- * U8/QASYMM8/S8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source image in X dimension
- * (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension
- * (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension
- * (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element
- * in the source image
- * @param[in] input_stride_w Stride of the source tensor in W dimension
- * (in bytes)
- * @param[in] input_step_w output_stride_w * number of elements along W
- * processed per workitem(in bytes)
- * @param[out] output_ptr Pointer to the destination image.
- * Supported data types: U32
- * @param[in] output_stride_x Stride of the destination image in X dimension
- * (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension
- * (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension
- * (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the source tensor in W dimension
- * (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W
- * processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
- * destination image
- * @param[in] axis Axis through which reduction occurs
- * @param[in] dim Dimension across the axis to be reduced.
- */
-
-__kernel void arg_op(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output), const int axis,
- const int dim)
-{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
-
- int indices[4] = {
- get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_OUT,
- get_global_id(2) / DEPTH_OUT,
- };
-
- DATA_TYPE value =
- *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
- DATA_TYPE tval = value;
- int idx = 0;
- for (int i = 1; i < dim; ++i)
- {
- indices[axis] = i;
-
-#if OP_CODE == 1 // ArgMax
- value = max(value, *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1],
- indices[2], indices[3])));
-#elif OP_CODE == 2 // ArgMin
- value = min(value, *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1],
- indices[2], indices[3])));
-#else
- return;
-
-#endif
-
- if (tval != value)
- {
- idx = indices[axis];
- tval = value;
- }
- }
-
- *((__global uint *)out.ptr) = idx;
-}
-#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "helpers_asymm.h"
-
-#ifdef SATURATE
-#define ADD(x, y) add_sat((x), (y))
-#define SUB(x, y) sub_sat((x), (y))
-#else /* SATURATE */
-#define ADD(x, y) (x) + (y)
-#define SUB(x, y) (x) - (y)
-#endif /* SATURATE */
-
-/** Performs a pixelwise addition used to quantize down the int32 accumulator values of GEMMLowp to
- * QASYMM8
- *
- * The following computations will be performed:
- *
- * -# Add offset terms to inputs
- -# Get scaled value of two inputs
- * -# Add inputs
- * -# Add offset terms to final result
- * -# Multiply each entry of result by result_mult_int
- * -# Shift the int32 accumulator by result_shift
- * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8.
- *
- * @attention The inputs and output data types need to be passed at compile time using
- * -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:
- * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=uchar
- * @attention The number of bits to shift left of input tensors must be passed at compile time using
- * -DLEFT_SHIFT
- * @attention The offset, scalar scale factor and number of bits to shift right of input tensors
- * must be passed at compile time using -DIN1_OFFSET, -RIN1_MULT_INT, -DIN1_SHIFT,
- -DIN2_OFFSET,
- * -RIN2_MULT_INT and -DIN2_SHIFT
- * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
- * must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and
- -DRESULT_SHIFT
- *
- * @attention The input and output data_types need to be passed at compile time using
- * -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:
- * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=uchar
- * @attention The inputs and output scale information of qasymm8 need to be passed at compile time
- * using -DSCALE_IN1, -DSCALE_IN2 and -DSCALE_OUT:
- * e.g. -DSCALE_IN1=1.f -DSCALE_IN2=1.f -DSCALE_OUT=2.f
- * @attention The inputs and output scale offset need to be passed at compile time using
- * -DOFFSET_IN1, -DOFFSET_IN2 and -DOFFSET_OUT:
- * e.g. -DOFFSET_IN1=0 -DOFFSET_IN2=0 -DOFFSET_OUT=0
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
- * -DVEC_SIZE=16
- * @attention To perform saturating operation -DSATURATE has to be passed to the compiler otherwise
- * wrapping policy will be used.
- *
- * @param[in] in1_ptr Pointer to the source tensor.
- * Supported data types: QASYMM8
- * @param[in] in1_stride_x Stride of the source tensor in X dimension
- * (in bytes)
- * @param[in] in1_step_x in1_stride_x * number of elements along X processed
- * per workitem(in bytes)
- * @param[in] in1_stride_y Stride of the source tensor in Y dimension
- * (in bytes)
- * @param[in] in1_step_y in1_stride_y * number of elements along Y processed
- * per workitem(in bytes)
- * @param[in] in1_stride_z Stride of the source tensor in Z dimension
- * (in bytes)
- * @param[in] in1_step_z in1_stride_z * number of elements along Z processed
- * per workitem(in bytes)
- * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source
- * tensor
- * @param[in] in2_ptr Pointer to the source tensor. Supported data types:
- * QASYMM8
- * @param[in] in2_stride_x Stride of the source tensor in X dimension
- * (in bytes)
- * @param[in] in2_step_x in2_stride_x * number of elements along X processed
- * per workitem(in bytes)
- * @param[in] in2_stride_y Stride of the source tensor in Y dimension
- * (in bytes)
- * @param[in] in2_step_y in2_stride_y * number of elements along Y processed
- * per workitem(in bytes)
- * @param[in] in2_stride_z Stride of the source tensor in Z dimension
- * (in bytes)
- * @param[in] in2_step_z in2_stride_z * number of elements along Z processed
- * per workitem(in bytes)
- * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source
- * tensor
- * @param[out] out_ptr Pointer to the destination tensor.
- * Supported data types: QASYMM8
- * @param[in] out_stride_x Stride of the destination tensor in X dimension
- * (in bytes)
- * @param[in] out_step_x out_stride_x * number of elements along X processed
- * per workitem(in bytes)
- * @param[in] out_stride_y Stride of the destination tensor in Y dimension
- * (in bytes)
- * @param[in] out_step_y out_stride_y * number of elements along Y processed
- * per workitem(in bytes)
- * @param[in] out_stride_z Stride of the source tensor in Z dimension
- * (in bytes)
- * @param[in] out_step_z out_stride_z * number of elements along Z processed
- * per workitem(in bytes)
- * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination
- * tensor
- */
-__kernel void arithmetic_add_qasymm8(TENSOR3D_DECLARATION(in1), TENSOR3D_DECLARATION(in2),
- TENSOR3D_DECLARATION(out))
-{
- // Get pixels pointer
- Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1);
- Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
- Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
-
- // Load data
- VEC_DATA_TYPE(int, 16)
- in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(int, 16));
- VEC_DATA_TYPE(int, 16)
- in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(int, 16));
-
- // Get scaled value of two inputs
- VEC_DATA_TYPE(int, 16) in1_val = in1_data + (VEC_DATA_TYPE(int, 16))(IN1_OFFSET);
- VEC_DATA_TYPE(int, 16) in2_val = in2_data + (VEC_DATA_TYPE(int, 16))(IN2_OFFSET);
-
- VEC_DATA_TYPE(int, 16)
- left_shift = (VEC_DATA_TYPE(int, 16))1 << (VEC_DATA_TYPE(int, 16))(LEFT_SHIFT);
- VEC_DATA_TYPE(int, 16) shifted_in1_val = in1_val * left_shift;
- VEC_DATA_TYPE(int, 16) shifted_in2_val = in2_val * left_shift;
-
- VEC_DATA_TYPE(int, 16)
- scaled_in1_val =
- ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(shifted_in1_val, IN1_MULT_INT, IN1_SHIFT, 16);
- VEC_DATA_TYPE(int, 16)
- scaled_in2_val =
- ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(shifted_in2_val, IN2_MULT_INT, IN2_SHIFT, 16);
-
- // Add inputs and multiply with a multiplier smaller than 1
- VEC_DATA_TYPE(int, 16) sum_val = scaled_in1_val + scaled_in2_val;
- VEC_DATA_TYPE(int, 16)
- out_val =
- ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(sum_val, RESULT_MULT_INT, RESULT_SHIFT, 16);
- out_val += (VEC_DATA_TYPE(int, 16))(RESULT_OFFSET);
-
- VEC_DATA_TYPE(uchar, 16) res = CONVERT(out_val, VEC_DATA_TYPE(uchar, 16));
-
- // TODO: Apply min-max BOUND to support fuse with relu.
- /*
- #if defined(MIN_BOUND)
- res = max(res, (uchar16)MIN_BOUND);
- #endif // defined(MIN_BOUND)
- #if defined(MAX_BOUND)
- res = min(res, (uchar16)MAX_BOUND);
- #endif // defined(MAX_BOUND)
- */
-
- // Store result
- VSTORE(16)(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "helpers.h"
-
-#ifndef SCALE
-#define SCALE 1.0f
-#endif
-#ifndef OFFSET
-#define OFFSET 0
-#endif
-#ifndef VEC_SIZE
-#define VEC_SIZE 1
-#endif
-
-#if defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT)
-/** Perform a cast operation on an input tensor.
- *
- * @attention Data types of both input and output can be passed using the -DDATA_TYPE_IN and
- * -DDATA_TYPE_OUT compile flag, e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT=int
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
- * -DVEC_SIZE=16
- * @attention -DBOOL_INPUT : Whether type of input is bool.
- *
- * @param[in] input_ptr Pointer to the source image. Supported data
- * types: F16/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in
- * bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in
- * bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
- * image
- * @param[out] output_ptr Pointer to the destination image. Supported data
- * types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension
- * (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension
- * (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
- * destination image
- */
-__kernel void cast(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output))
-{
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- VSTORE(VEC_SIZE)
- (CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr),
- VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)),
- 0, (__global DATA_TYPE_OUT *)output.ptr);
- VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)
- res = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr),
- VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE));
-#if defined(BOOL_INPUT)
- VEC_DATA_TYPE(char, VEC_SIZE) tmp = CONVERT(res, VEC_DATA_TYPE(char, VEC_SIZE));
- VEC_DATA_TYPE(char, VEC_SIZE) mask = (VEC_DATA_TYPE(char, VEC_SIZE))(1);
- res = CONVERT(tmp & mask, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE));
-#endif // defined(BOOL_INPUT)
-
- VSTORE(VEC_SIZE)(res, 0, (__global DATA_TYPE_OUT *)output.ptr);
-}
-
-/** Perform a cast operation on an QASYMM8 input tensor.
- * @attention Data types of both input and output can be passed using the -DDATA_TYPE_IN and
- * -DDATA_TYPE_OUT compile flag, e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT=int
- * @attention Offset and Scale of input should be given as a preprocessor argument using
- * -DOFFSET=int, -DSCALE=float. e.g. -DOFFSET=1, -DSCALE=0.5
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
- * -DVEC_SIZE=16
- *
- * @param[in] input_ptr Pointer to the source image. Supported data
- * types: F16/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in
- * bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in
- * bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
- * image
- * @param[out] output_ptr Pointer to the destination image. Supported data
- * types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension
- * (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension
- * (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
- * destination image
- */
-__kernel void cast_qasymm_in(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output))
-{
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE)
- in_data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr);
- VEC_DATA_TYPE(int, VEC_SIZE) offset = (VEC_DATA_TYPE(int, VEC_SIZE))(OFFSET);
- VEC_DATA_TYPE(float, VEC_SIZE) scale = (VEC_DATA_TYPE(float, VEC_SIZE))(SCALE);
-
- VEC_DATA_TYPE(int, VEC_SIZE) tmp = CONVERT(in_data, VEC_DATA_TYPE(int, VEC_SIZE)) - offset;
- VEC_DATA_TYPE(float, VEC_SIZE) out_data = CONVERT(tmp, VEC_DATA_TYPE(float, VEC_SIZE)) * scale;
-
- VSTORE(VEC_SIZE)
- (CONVERT(out_data, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0,
- (__global DATA_TYPE_OUT *)output.ptr);
-}
-
-/** Perform a cast operation on an QASYMM8 output tensor.
- * @attention Data types of both input and output can be passed using the -DDATA_TYPE_IN and
- * -DDATA_TYPE_OUT compile flag, e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT=int
- * @attention Offset and Scale of output should be given as a preprocessor argument using
- * -DOFFSET=int, -DSCALE=float. e.g. -DOFFSET=1, -DSCALE=0.5
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
- * -DVEC_SIZE=16
- *
- * @param[in] input_ptr Pointer to the source image. Supported data
- * types: F16/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in
- * bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in
- * bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
- * image
- * @param[out] output_ptr Pointer to the destination image. Supported data
- * types: U8
- * @param[in] output_stride_x Stride of the destination image in X dimension
- * (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension
- * (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
- * destination image
- */
-__kernel void cast_qasymm_out(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output))
-{
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE)
- in_data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr);
- VEC_DATA_TYPE(int, VEC_SIZE) offset = (VEC_DATA_TYPE(int, VEC_SIZE))(OFFSET);
- VEC_DATA_TYPE(float, VEC_SIZE) scale = (VEC_DATA_TYPE(float, VEC_SIZE))(SCALE);
-
- VEC_DATA_TYPE(float, VEC_SIZE) tmp = CONVERT(in_data, VEC_DATA_TYPE(float, VEC_SIZE)) / scale;
- VEC_DATA_TYPE(float, VEC_SIZE) out_data = tmp + CONVERT(offset, VEC_DATA_TYPE(float, VEC_SIZE));
-
- VSTORE(VEC_SIZE)
- (CONVERT(out_data, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0,
- (__global DATA_TYPE_OUT *)output.ptr);
-}
-#endif // defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "helpers.h"
-
-#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) && defined(Z_OUT)
-/** Perform space to depth rearrangement of tensor
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size.
- * e.g. -DDEPTH_OUT=16
- * @attention The value of the z-axis of output tensor should be given as a preprocessor argument
- * using -DZ_OUT=size. e.g. -DZ_OUT=16
- * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE=size. e.g.
- * -DBLOCK_SIZE=1
- *
- * @param[in] input_ptr Pointer to the source image. Supported data
- * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in
- * bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in
- * bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
- * image
- * @param[out] output_ptr Pointer to the destination image. Supported data
- * types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension
- * (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension
- * (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the source tensor in W dimension (in
- * bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W
- * processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
- * destination image
- */
-__kernel void depth_to_space_nchw(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output))
-{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, Z_OUT);
-
- int out_index[4] = {0};
- int in_index[4] = {0};
-
- out_index[0] = get_global_id(0); // W
- out_index[1] = get_global_id(1); // H
- out_index[2] = get_global_id(2) % Z_OUT; // C
- out_index[3] = get_global_id(2) / Z_OUT; // B
-
- in_index[0] = out_index[0] / BLOCK_SIZE;
- in_index[1] = out_index[1] / BLOCK_SIZE;
- in_index[2] = out_index[2] +
- ((out_index[1] % BLOCK_SIZE) * BLOCK_SIZE + out_index[0] % BLOCK_SIZE) * DEPTH_OUT;
- in_index[3] = out_index[3];
-
- *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(
- &in, in_index[0], in_index[1], in_index[2], in_index[3]));
-}
-#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) && defined(Z_OUT)
-
-#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) && defined(Z_OUT)
-/** Perform space to depth rearrangement of tensor (NHWC)
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size.
- * e.g. -DDEPTH_OUT=16
- * @attention The value of the z-axis of output tensor should be given as a preprocessor argument
- * using -DZ_OUT=size. e.g. -DZ_OUT=16
- * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE=size. e.g.
- * -DBLOCK_SIZE=1
- *
- * @param[in] input_ptr Pointer to the source image. Supported data
- * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in
- * bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in
- * bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
- * image
- * @param[out] output_ptr Pointer to the destination image. Supported data
- * types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension
- * (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension
- * (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the source tensor in W dimension (in
- * bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W
- * processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
- * destination image
- */
-__kernel void depth_to_space_nhwc(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output))
-{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, Z_OUT);
-
- int out_index[4] = {0};
- int in_index[4] = {0};
-
- out_index[0] = get_global_id(0); // C
- out_index[1] = get_global_id(1); // W
- out_index[2] = get_global_id(2) % Z_OUT; // H
- out_index[3] = get_global_id(2) / Z_OUT; // B
-
- in_index[0] = out_index[0] +
- ((out_index[2] % BLOCK_SIZE) * BLOCK_SIZE + out_index[1] % BLOCK_SIZE) * DEPTH_OUT;
- in_index[1] = out_index[1] / BLOCK_SIZE;
- in_index[2] = out_index[2] / BLOCK_SIZE;
- in_index[3] = out_index[3];
-
- *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(
- &in, in_index[0], in_index[1], in_index[2], in_index[3]));
-}
-#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) && defined(Z_OUT)
*/
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-
#ifndef ARM_COMPUTE_HELPER_H
#define ARM_COMPUTE_HELPER_H
#pragma OPENCL EXTENSION cl_arm_printf : enable
#endif // defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
+#define GPU_ARCH_MIDGARD 0x100
+#define GPU_ARCH_BIFROST 0x200
+
+/** Concatenate two inputs.
+ *
+ * @param[in] a The first input to be concatenated
+ * @param[in] b The second input to be concatenated
+ *
+ * @return The concatenated output
+ */
+#define CONCAT(a, b) a##b
+
+/** Expand the given vector
+ *
+ * @param[in] x The vector to be expanded
+ *
+ * @return The expanded output
+ */
#define EXPAND(x) x
+/** Clamp the given value between an upper and lower bound.
+ *
+ * @param[in] x The value to be clamped
+ * @param[in] min_val The lower bound
+ * @param[in] max_val The upper bound
+ *
+ * @return The clamped value.
+ */
#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
+/** REVn reverses the given vector whose size is n.
+ * @name REVn
+ *
+ * @param[in] x The vector to be reversed
+ *
+ * @return The reversed vector
+ * @{
+ */
+#define REV1(x) ((x))
+#define REV2(x) ((x).s10)
+#define REV3(x) ((x).s210)
+#define REV4(x) ((x).s3210)
+#define REV8(x) ((x).s76543210)
+#define REV16(x) ((x).sFEDCBA9876543210)
+/** @} */ // end of group REVn
+
+/** Reverse the given vector.
+ * @name REVERSE
+ *
+ * @param[in] x The vector to be reversed
+ * @param[in] s The size of the vector
+ *
+ * @return The reversed vector
+ * @{
+ */
+#define REVERSE_STR(x, s) REV##s((x))
+#define REVERSE(x, s) REVERSE_STR(x, s)
+/** @} */ // end of group REVERSE
+
+/** Circular-right-shift (rotate-right) the vector of size s by the amount of n.
+ * @name ROTs_n
+ *
+ * @param[in] x The vector to be shifted
+ *
+ * @return The shifted vector
+ * @{
+ */
+#define ROT1_0(x) ((x))
+
+#define ROT2_0(x) ((x))
+#define ROT2_1(x) ((x).s10)
+
+#define ROT3_0(x) ((x))
+#define ROT3_1(x) ((x).s201)
+#define ROT3_2(x) ((x).s120)
+
+#define ROT4_0(x) ((x))
+#define ROT4_1(x) ((x).s3012)
+#define ROT4_2(x) ((x).s2301)
+#define ROT4_3(x) ((x).s1230)
+
+#define ROT8_0(x) ((x))
+#define ROT8_1(x) ((x).s70123456)
+#define ROT8_2(x) ((x).s67012345)
+#define ROT8_3(x) ((x).s56701234)
+#define ROT8_4(x) ((x).s45670123)
+#define ROT8_5(x) ((x).s34567012)
+#define ROT8_6(x) ((x).s23456701)
+#define ROT8_7(x) ((x).s12345670)
+
+#define ROT16_0(x) ((x))
+#define ROT16_1(x) ((x).sF0123456789ABCDE)
+#define ROT16_2(x) ((x).sEF0123456789ABCD)
+#define ROT16_3(x) ((x).sDEF0123456789ABC)
+#define ROT16_4(x) ((x).sCDEF0123456789AB)
+#define ROT16_5(x) ((x).sBCDEF0123456789A)
+#define ROT16_6(x) ((x).sABCDEF0123456789)
+#define ROT16_7(x) ((x).s9ABCDEF012345678)
+#define ROT16_8(x) ((x).s89ABCDEF01234567)
+#define ROT16_9(x) ((x).s789ABCDEF0123456)
+#define ROT16_10(x) ((x).s6789ABCDEF012345)
+#define ROT16_11(x) ((x).s56789ABCDEF01234)
+#define ROT16_12(x) ((x).s456789ABCDEF0123)
+#define ROT16_13(x) ((x).s3456789ABCDEF012)
+#define ROT16_14(x) ((x).s23456789ABCDEF01)
+#define ROT16_15(x) ((x).s123456789ABCDEF0)
+/** @} */ // end of group ROTs_n
+
+/** Circular-right-shift (rotate-right) the given vector by the given amount.
+ * @name ROTATE
+ *
+ * @param[in] x The vector to be shifted
+ * @param[in] s The size of the vector
+ * @param[in] n The amount to be shifted
+ *
+ * @return The shifted vector
+ * @{
+ */
+#define ROTATE_STR(x, s, n) ROT##s##_##n(x)
+#define ROTATE(x, s, n) ROTATE_STR(x, s, n)
+/** @} */ // end of group ROTATE
+
+/** Creates a vector of size n filled with offset values corresponding to the location of each
+ * element.
+ * @name V_OFFSn
+ *
+ * @param[in] dt The data type of the output vector
+ *
+ * @return The vector filled with offset values
+ * @{
+ */
+#define V_OFFS1(dt) (dt)(0)
+#define V_OFFS2(dt) (dt)(0, 1)
+#define V_OFFS3(dt) (dt)(0, 1, 3)
+#define V_OFFS4(dt) (dt)(0, 1, 2, 3)
+#define V_OFFS8(dt) (dt)(0, 1, 2, 3, 4, 5, 6, 7)
+#define V_OFFS16(dt) (dt)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
+/** @} */ // end of group V_OFFSn
+
+/** Create a vector filled with offset values corresponding to the location of each element.
+ * @name VEC_OFFS
+ *
+ * @param[in] dt The data type of the output vector
+ * @param[in] s The size of the output vector
+ *
+ * @return The vector filled with offset values
+ * @{
+ */
+#define VEC_OFFS_STR(dt, s) V_OFFS##s(dt)
+#define VEC_OFFS(dt, s) VEC_OFFS_STR(dt, s)
+/** @} */ // end of group VEC_OFFS
+
#define VLOAD_STR(size) vload##size
#define VLOAD(size) VLOAD_STR(size)
#define VSTORE_STR(size) vstore##size
#define VSTORE(size) VSTORE_STR(size)
+#define float1 float
+#define half1 half
+#define char1 char
+#define uchar1 uchar
+#define short1 short
+#define ushort1 ushort
+#define int1 int
+#define uint1 uint
+#define long1 long
+#define ulong1 ulong
+#define double1 double
+
+#define vload1(OFFSET, PTR) *(OFFSET + PTR)
+#define vstore1(DATA, OFFSET, PTR) *(OFFSET + PTR) = DATA
+
+// Convert built-in functions with _sat modifier are not supported in floating point so we create
+// defines
+// without _sat to overcome this issue
+#define convert_float_sat convert_float
+#define convert_float1_sat convert_float
+#define convert_float2_sat convert_float2
+#define convert_float3_sat convert_float3
+#define convert_float4_sat convert_float4
+#define convert_float8_sat convert_float8
+#define convert_float16_sat convert_float16
+#define convert_half_sat convert_float
+#define convert_half1_sat convert_half
+#define convert_half2_sat convert_half2
+#define convert_half3_sat convert_half3
+#define convert_half4_sat convert_half4
+#define convert_half8_sat convert_half8
+#define convert_half16_sat convert_half16
+
+#define convert_float1 convert_float
+#define convert_half1 convert_half
+#define convert_char1 convert_char
+#define convert_uchar1 convert_uchar
+#define convert_short1 convert_short
+#define convert_ushort1 convert_ushort
+#define convert_int1 convert_int
+#define convert_uint1 convert_uint
+#define convert_long1 convert_long
+#define convert_ulong1 convert_ulong
+#define convert_double1 convert_double
+
+#define convert_char1_sat convert_char_sat
+#define convert_uchar1_sat convert_uchar_sat
+#define convert_short1_sat convert_short_sat
+#define convert_ushort1_sat convert_ushort_sat
+#define convert_int1_sat convert_int_sat
+#define convert_uint1_sat convert_uint_sat
+#define convert_long1_sat convert_long_sat
+#define convert_ulong1_sat convert_ulong_sat
+#define convert_double1_sat convert_double_sat
+
#define VEC_DATA_TYPE_STR(type, size) type##size
#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
*/
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-
#ifndef ARM_COMPUTE_HELPERS_ASYMM_H
#define ARM_COMPUTE_HELPERS_ASYMM_H
#include "helpers.h"
+/** Convert the given vector with round to nearest even rounding mode
+ *
+ * @param[in] x The target to be converted
+ * @param[in] type The target type
+ *
+ * @return The converted vector
+ */
+#define CONVERT_DOWN_RTE_STR(x, type) (convert_##type##_rte((x)))
+#define CONVERT_DOWN_RTE(x, type) CONVERT_DOWN_RTE_STR(x, type)
+
+/** Quantize a floating-point scalar value to 8-bit asymmetric
+ *
+ * @param[in] input Input value to quantize
+ * @param[in] offset Quantization offset
+ * @param[in] scale Quantization scale
+ *
+ * @return quantized value
+ */
+inline uchar quantize_qasymm8(float input, float offset, float scale)
+{
+ float out_f32 = input / scale + offset;
+ uchar res_u8 = CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, int), uchar);
+ return res_u8;
+}
+
+/** Dequantize a scalar value from 8-bit asymmetric to floating-point
+ *
+ * @param[in] input Input value to quantize
+ * @param[in] offset Quantization offset
+ * @param[in] scale Quantization scale
+ *
+ * @return quantized value
+ */
+inline float dequantize_qasymm8(uchar input, float offset, float scale)
+{
+ return ((float)input - offset) * scale;
+}
+
+/** Dequantize a scalar value from signed 8-bit asymmetric to floating-point
+ *
+ * @param[in] input Input value to quantize
+ * @param[in] offset Quantization offset
+ * @param[in] scale Quantization scale
+ *
+ * @return quantized value
+ */
+inline float dequantize_qasymm8_signed(char input, float offset, float scale)
+{
+ return ((float)input - offset) * scale;
+}
+
+/** Quantize a vector of values from floating-point
+ *
+ * @param[in] type Output data type.
+ * @param[in] size Size of vector.
+ *
+ * @return quantized values
+ */
+#define QUANTIZE_IMPL(type, size) \
+ inline VEC_DATA_TYPE(type, size) \
+ quantize_##type##size(VEC_DATA_TYPE(float, size) input, float offset, float scale) \
+ { \
+ VEC_DATA_TYPE(float, size) \
+ out_f32 = input / (VEC_DATA_TYPE(float, size))(scale) + (VEC_DATA_TYPE(float, size))(offset); \
+ VEC_DATA_TYPE(type, size) \
+ res = CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, VEC_DATA_TYPE(int, size)), \
+ VEC_DATA_TYPE(type, size)); \
+ return res; \
+ }
+
+/** Dequantize a vector of values to floating-point
+ *
+ * @param[in] type Input data type.
+ * @param[in] size Size of vector.
+ *
+ * @return dequantized values in floating point
+ */
+#define DEQUANTIZE_IMPL(type, size) \
+ inline VEC_DATA_TYPE(float, size) \
+ dequantize_##type##size(VEC_DATA_TYPE(type, size) input, float offset, float scale) \
+ { \
+ return (CONVERT(input, VEC_DATA_TYPE(float, size)) - offset) * scale; \
+ }
+
/** Correctly-rounded-to-nearest division by a power-of-two.
*
* @param[in] size Size of vector.
*
* @return Correctly-rounded-to-nearest division by a power-of-two.
*/
-#define ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- asymm_rounding_divide_by_POW2_##size(VEC_DATA_TYPE(int, size) x, int exponent) \
- { \
- VEC_DATA_TYPE(int, size) \
- mask = (1 << exponent) - 1; \
- const VEC_DATA_TYPE(int, size) zero = 0; \
- const VEC_DATA_TYPE(int, size) one = 1; \
- VEC_DATA_TYPE(int, size) \
- threshold = (mask >> 1) + select(zero, one, x < 0); \
- return (x >> exponent) + select(zero, one, (x & mask) > threshold); \
+#define ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(size) \
+ inline VEC_DATA_TYPE(int, size) asymm_rounding_divide_by_POW2_##size( \
+ VEC_DATA_TYPE(int, size) x, VEC_DATA_TYPE(int, size) exponent) \
+ { \
+ const VEC_DATA_TYPE(int, size) zero = (VEC_DATA_TYPE(int, size))0; \
+ const VEC_DATA_TYPE(int, size) one = (VEC_DATA_TYPE(int, size))1; \
+ VEC_DATA_TYPE(int, size) \
+ mask = (one << exponent) - one; \
+ VEC_DATA_TYPE(int, size) \
+ threshold = (mask >> 1) + select(zero, one, x < 0); \
+ return (x >> exponent) + select(zero, one, (x & mask) > threshold); \
}
/** Product of two numbers, interpreting them as fixed-point values in the interval [-1, 1),
b_64 = convert_long##size(b); \
VEC_DATA_TYPE(long, size) \
ab_64 = a_64 * b_64; \
- /* COMPMID-907 */ \
+ /* Revert COMPMID-907 */ \
+ VEC_DATA_TYPE(long, size) \
+ mask1 = 1 << 30; \
+ VEC_DATA_TYPE(long, size) \
+ mask2 = 1 - (1 << 30); \
+ VEC_DATA_TYPE(long, size) \
+ is_positive_or_zero = ab_64 >= 0; \
+ VEC_DATA_TYPE(long, size) \
+ nudge = select(mask2, mask1, is_positive_or_zero); \
+ VEC_DATA_TYPE(long, size) \
+ mask = 1ll << 31; \
VEC_DATA_TYPE(int, size) \
- ab_x2_high32 = convert_int##size(((ab_64 + (1 << 30)) >> 31)); \
+ ab_x2_high32 = convert_int##size((ab_64 + nudge) / mask); \
return select(ab_x2_high32, INT_MAX, overflow); \
}
return ASYMM_SATURATING_ROUNDING_MULT_BY_POW2(value, exponent, size); \
}
+#define QUANTIZE_STR(input, offset, scale, type, size) quantize_##type##size(input, offset, scale)
+#define QUANTIZE(input, offset, scale, type, size) QUANTIZE_STR(input, offset, scale, type, size)
+#define DEQUANTIZE_STR(input, offset, scale, type, size) \
+ dequantize_##type##size(input, offset, scale)
+#define DEQUANTIZE(input, offset, scale, type, size) \
+ DEQUANTIZE_STR(input, offset, scale, type, size)
+
#define ASYMM_ROUNDING_DIVIDE_BY_POW2(x, exponent, size) \
asymm_rounding_divide_by_POW2_##size(x, exponent)
#define ASYMM_MULT(a, b, size) asymm_mult##size(a, b)
+#define ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(x, quantized_multiplier, left_shift, size) \
+ ASYMM_MULT(x *((VEC_DATA_TYPE(int, size))(1) << (-left_shift)), quantized_multiplier, size)
#define ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(x, quantized_multiplier, right_shift, size) \
ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(x, quantized_multiplier, size), right_shift, size)
#define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL(a, size) \
#define ASYMM_RESCALE(value, src_integer_bits, dst_integer_bits, size) \
asymm_rescale##size(value, src_integer_bits, dst_integer_bits)
+#define MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(size) \
+ inline VEC_DATA_TYPE(int, size) \
+ multiply_by_quantized_multiplier##size(VEC_DATA_TYPE(int, size) input, int qmul, int shift) \
+ { \
+ const int left_shift = shift > 0 ? shift : 0; \
+ const int right_shift = shift > 0 ? 0 : -shift; \
+ return ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(input * (1 << left_shift), qmul, size), \
+ right_shift, size); \
+ }
+#define MULTIPLY_BY_QUANTIZED_MULTIPLIER(input, qmul, shift, size) \
+ multiply_by_quantized_multiplier##size(input, qmul, shift)
+
+QUANTIZE_IMPL(uchar, 1)
+QUANTIZE_IMPL(char, 1)
+QUANTIZE_IMPL(uint, 1)
+QUANTIZE_IMPL(int, 1)
+QUANTIZE_IMPL(uchar, 4)
+QUANTIZE_IMPL(ushort, 4)
+QUANTIZE_IMPL(short, 4)
+QUANTIZE_IMPL(uchar, 16)
+QUANTIZE_IMPL(char, 16)
+QUANTIZE_IMPL(ushort, 16)
+QUANTIZE_IMPL(short, 16)
+QUANTIZE_IMPL(uint, 16)
+QUANTIZE_IMPL(int, 16)
+
+DEQUANTIZE_IMPL(uchar, 1)
+DEQUANTIZE_IMPL(char, 1)
+DEQUANTIZE_IMPL(uint, 1)
+DEQUANTIZE_IMPL(int, 1)
+DEQUANTIZE_IMPL(uchar, 4)
+DEQUANTIZE_IMPL(ushort, 4)
+DEQUANTIZE_IMPL(short, 4)
+DEQUANTIZE_IMPL(uchar, 16)
+DEQUANTIZE_IMPL(char, 16)
+DEQUANTIZE_IMPL(ushort, 16)
+DEQUANTIZE_IMPL(short, 16)
+DEQUANTIZE_IMPL(uint, 16)
+DEQUANTIZE_IMPL(int, 16)
+
+ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(1)
ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(2)
ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(4)
ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(8)
ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(16)
+ASYMM_MULT_IMPL(1)
ASYMM_MULT_IMPL(2)
ASYMM_MULT_IMPL(4)
ASYMM_MULT_IMPL(8)
ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(8)
ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(16)
+ASYMM_SELECT_USING_MASK_IMPL(1)
ASYMM_SELECT_USING_MASK_IMPL(2)
ASYMM_SELECT_USING_MASK_IMPL(4)
ASYMM_SELECT_USING_MASK_IMPL(8)
ASYMM_SELECT_USING_MASK_IMPL(16)
+ASYMM_MASK_IF_ZERO_IMPL(1)
ASYMM_MASK_IF_ZERO_IMPL(2)
ASYMM_MASK_IF_ZERO_IMPL(4)
ASYMM_MASK_IF_ZERO_IMPL(8)
ASYMM_MASK_IF_ZERO_IMPL(16)
+ASYMM_MASK_IF_NON_ZERO_IMPL(1)
ASYMM_MASK_IF_NON_ZERO_IMPL(2)
ASYMM_MASK_IF_NON_ZERO_IMPL(4)
ASYMM_MASK_IF_NON_ZERO_IMPL(8)
ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(8)
ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(16)
+ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(1)
ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(2)
ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(4)
ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(8)
ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(8)
ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(16)
+ASYMM_RESCALE_IMPL(1)
ASYMM_RESCALE_IMPL(2)
ASYMM_RESCALE_IMPL(4)
ASYMM_RESCALE_IMPL(8)
ASYMM_RESCALE_IMPL(16)
+MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(1)
+MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(2)
+MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(4)
+MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(8)
+MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(16)
+
#endif // ARM_COMPUTE_HELPERS_ASYMM_H
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "helpers.h"
-
-#ifndef VEC_SIZE
-#define VEC_SIZE 1
-#endif
-
-#if defined(DATA_TYPE)
-/** Returns result of prelu function implemented as below:
- * f(input) = alpha * input for input < 0, f(input) = input for input >= 0.
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
- * -DVEC_SIZE=16
- * @note Can only take floating point data types.
- *
- * @param[in] input1_ptr Pointer to the source image. Supported Data
- * types : F16/F32
- * @param[in] input1_stride_x Stride of the source image in X dimension (in
- * bytes)
- * @param[in] input1_step_x input1_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] input1_stride_y Stride of the source image in Y dimension (in
- * bytes)
- * @param[in] input1_step_y input1_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] input1_step_z input1_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source
- * image
- * @param[in] alpha_ptr Pointer to the source image. Supported Data
- * types : F16/F32
- * @param[in] alpha_stride_x Stride of the source image in X dimension (in
- * bytes)
- * @param[in] alpha_step_x input2_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] alpha_stride_y Stride of the source image in Y dimension (in
- * bytes)
- * @param[in] alpha_step_y input2_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] alpha_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] alpha_step_z input2_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] alpha_offset_first_element_in_bytes The offset of the first element in the source
- * image
- *
- * @param[out] output_ptr Pointer to the destination image. Supported
- * data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension
- * (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension
- * (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
- * destination image
- */
-__kernel void prelu(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(alpha),
- TENSOR3D_DECLARATION(output))
-{
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D alpha = CONVERT_TO_TENSOR3D_STRUCT(alpha);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- VSTORE(VEC_SIZE)
- (VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr) < 0
- ? VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr) *
- VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)alpha.ptr)
- : VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr),
- 0, (__global DATA_TYPE *)output.ptr);
-}
-#endif // defined(DATA_TYPE)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "helpers.h"
-#define SUB(x, y) (x) - (y)
-
-#if defined(OFF_IN) && defined(OFF_ALPHA) && defined(OFF_OUT) && defined(SCALE_IN) && \
- defined(SCALE_ALPHA) && defined(SCALE_OUT) && defined(VEC_SIZE)
-
-#define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE)
-#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
-#define VEC_UCHAR VEC_DATA_TYPE(uchar, VEC_SIZE)
-#define CONVERT_RTE(x, type) (convert_##type##_rte((x)))
-#define CONVERT_DOWN(x, type) CONVERT_RTE(x, type)
-#define SELECT_TYPE VEC_INT
-
-/** Returns result of prelu function implemented as below:
- * f(input) = alpha * input for input < 0, f(input) = input for input >= 0.
- *
- * @attention Data type can be passed using the -DDATA_TYPE_IN compile flag, e.g.
- * -DDATA_TYPE_IN=uchar
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
- * -DVEC_SIZE=16
- * @note Can only take uchar data types.
- *
- * @param[in] input1_ptr Pointer to the source image. Supported Data
- * types : QASYMM8
- * @param[in] input1_stride_x Stride of the source image in X dimension (in
- * bytes)
- * @param[in] input1_step_x input1_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] input1_stride_y Stride of the source image in Y dimension (in
- * bytes)
- * @param[in] input1_step_y input1_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] input1_step_z input1_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source
- * image
- * @param[in] alpha_ptr Pointer to the source image. Supported Data
- * types : QASYMM8
- * @param[in] alpha_stride_x Stride of the source image in X dimension (in
- * bytes)
- * @param[in] alpha_step_x input2_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] alpha_stride_y Stride of the source image in Y dimension (in
- * bytes)
- * @param[in] alpha_step_y input2_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] alpha_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] alpha_step_z input2_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] alpha_offset_first_element_in_bytes The offset of the first element in the source
- * image
- * @param[out] output_ptr Pointer to the destination image. Supported
- * data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension
- * (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension
- * (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
- * destination image
- */
-__kernel void prelu_qasymm8(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(alpha),
- TENSOR3D_DECLARATION(output))
-{
- // Get pixels pointer
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D alpha = CONVERT_TO_TENSOR3D_STRUCT(alpha);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- VEC_INT in_vec = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)input.ptr), VEC_INT);
- VEC_INT alpha_vec = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)alpha.ptr), VEC_INT);
-
- in_vec = SUB(in_vec, (VEC_INT)((int)OFF_IN));
- alpha_vec = SUB(alpha_vec, (VEC_INT)((int)OFF_ALPHA));
-
- const VEC_FLOAT inf32 = CONVERT(in_vec, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN);
- const VEC_FLOAT alphaf32 = CONVERT(alpha_vec, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_ALPHA);
- const VEC_FLOAT outf32 =
- select(inf32, inf32 * alphaf32, CONVERT(inf32 < (VEC_FLOAT)0, SELECT_TYPE));
- const VEC_FLOAT qresf32 = outf32 / ((VEC_FLOAT)(float)SCALE_OUT) + ((VEC_FLOAT)((float)OFF_OUT));
- const VEC_UCHAR res = CONVERT_SAT(CONVERT_DOWN(qresf32, VEC_INT), VEC_UCHAR);
-
- VSTORE(VEC_SIZE)
- (res, 0, (__global uchar *)output.ptr);
-}
-
-#endif // defined(OFF_IN) && defined(OFF_ALPHA) && defined(OFF_OUT) && defined(SCALE_IN) &&
- // defined(SCALE_ALPHA) && defined(SCALE_OUT) && defined(VEC_SIZE)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "helpers.h"
-
-#if defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE) && defined(Z_IN)
-/** Perform space to depth rearrangement of tensor
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size.
- * e.g. -DDEPTH_IN=16
- * @attention The value of the z-axis of input tensor depth should be given as a preprocessor
- * argument using -DZ_IN=size. e.g. -DZ_IN=16
- * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE=size. e.g.
- * -DBLOCK_SIZE=1
- *
- * @param[in] input_ptr Pointer to the source image. Supported data
- * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in
- * bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in
- * bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
- * image
- * @param[out] output_ptr Pointer to the destination image. Supported data
- * types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension
- * (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension
- * (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the source tensor in W dimension (in
- * bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W
- * processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
- * destination image
- */
-__kernel void space_to_depth_nchw(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output))
-{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, Z_IN);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);
-
- int out_index[4] = {0};
- int in_index[4] = {0};
-
- in_index[0] = get_global_id(0); // W
- in_index[1] = get_global_id(1); // H
- in_index[2] = get_global_id(2) % Z_IN; // C
- in_index[3] = get_global_id(2) / Z_IN; // B
-
- out_index[0] = in_index[0] / BLOCK_SIZE;
- out_index[1] = in_index[1] / BLOCK_SIZE;
- out_index[2] =
- in_index[2] + ((in_index[1] % BLOCK_SIZE) * BLOCK_SIZE + in_index[0] % BLOCK_SIZE) * DEPTH_IN;
- out_index[3] = in_index[3];
-
- *((__global DATA_TYPE *)tensor4D_offset(&out, out_index[0], out_index[1], out_index[2],
- out_index[3])) = *((__global DATA_TYPE *)in.ptr);
-}
-#endif // defined(DATA_TYPE) && defined(Z_IN) && defined(BLOCK_SIZE) && defined(Z_IN)
-
-#if defined(DATA_TYPE) && defined(Z_IN) && defined(BLOCK_SIZE) && defined(Z_IN)
-/** Perform space to depth rearrangement of tensor
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size.
- * e.g. -DDEPTH_IN=16
- * @attention The value of the z-axis of input tensor depth should be given as a preprocessor
- * argument using -DZ_IN=size. e.g. -DZ_IN=16
- * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE=size. e.g.
- * -DBLOCK_SIZE=1
- *
- * @param[in] input_ptr Pointer to the source image. Supported data
- * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in
- * bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in
- * bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
- * image
- * @param[out] output_ptr Pointer to the destination image. Supported data
- * types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension
- * (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension
- * (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the source tensor in W dimension (in
- * bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W
- * processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
- * destination image
- */
-__kernel void space_to_depth_nhwc(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output))
-{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, Z_IN);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);
-
- int out_index[4] = {0};
- int in_index[4] = {0};
-
- in_index[0] = get_global_id(0); // C
- in_index[1] = get_global_id(1); // W
- in_index[2] = get_global_id(2) % Z_IN; // H
- in_index[3] = get_global_id(2) / Z_IN; // B
-
- out_index[0] =
- in_index[0] + ((in_index[2] % BLOCK_SIZE) * BLOCK_SIZE + in_index[1] % BLOCK_SIZE) * DEPTH_IN;
- out_index[1] = in_index[1] / BLOCK_SIZE;
- out_index[2] = in_index[2] / BLOCK_SIZE;
- out_index[3] = in_index[3];
-
- *((__global DATA_TYPE *)tensor4D_offset(&out, out_index[0], out_index[1], out_index[2],
- out_index[3])) = *((__global DATA_TYPE *)in.ptr);
-}
-#endif // defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE) && defined(Z_IN)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-const TensorShape inferOutputShape(const TensorShape &input_shape, const uint32_t axis)
-{
- TensorShape out_shape{input_shape};
-
- out_shape.set(axis, 1);
-
- return out_shape;
-}
-} // namespace
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis,
- ArgOperation /*op*/)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::S32, DataType::F32, DataType::U8,
- DataType::QASYMM8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(output, DataType::S32);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((input->tensor_shape().num_dimensions() - 1) !=
- output->tensor_shape().num_dimensions(),
- "Input's rank is not same with output");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->tensor_shape().total_size() == 0,
- "Inputs are not broadcast compatible");
-
- const TensorShape output_shape = inferOutputShape(input->tensor_shape(), axis);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_shape.total_size() != output->tensor_shape().total_size(),
- "output shape's size does not match axis");
-
- const auto num_dimensions = input->tensor_shape().num_dimensions();
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= num_dimensions, "axis must be less than (input's rank).");
- return Status{};
-}
-
-} // namespace
-
-CLArgOperationKernel::CLArgOperationKernel() : _input(nullptr), _output(nullptr), _axis() {}
-
-void CLArgOperationKernel::configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis,
- ArgOperation op)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis, op));
-
- _input = input;
- _output = output;
- _axis = axis;
-
- std::unique_ptr<ITensorInfo> output_info = output->info()->clone();
- output_info->set_tensor_shape(inferOutputShape(input->info()->tensor_shape(), axis));
-
- // Construct kernel and set op_code based on type of ArgOperation as specified by object op
- std::string kernel_name = "arg_op";
- int op_code = 0;
- if (op == ArgOperation::MAX)
- {
- op_code = 1;
- }
- else if (op == ArgOperation::MIN)
- {
- op_code = 2;
- }
- else
- throw std::runtime_error("Operation not supported, yet");
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output_info->dimension(2)));
- build_opts.emplace("-DOP_CODE=" + support::cpp11::to_string(op_code));
-
- // Create kernel
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*output_info, Steps());
-
- Coordinates coord;
- coord.set_num_dimensions(output_info->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output_info->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-Status CLArgOperationKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- const uint32_t axis, ArgOperation op)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis, op));
-
- return Status{};
-}
-
-void CLArgOperationKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const TensorShape &shape_in = _input->info()->tensor_shape();
-
- unsigned int idx = 2 * num_arguments_per_4D_tensor(); // Skip the input and output parameters
-
- _kernel.setArg<cl_int>(idx++, _axis);
- _kernel.setArg<cl_int>(idx++, shape_in[_axis]);
-
- Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- // Setup input slice
- Window slice_in(slice_out);
- slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_in.set(3, Window::Dimension(0, 0, 0));
-
- // Copy output's shape in order to use for recovering at end of this method
- const TensorShape shape_out = _output->info()->tensor_shape();
- _output->info()->set_tensor_shape(inferOutputShape(shape_in, _axis));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
- } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
-
- // Recover output's shape of output tensor
- _output->info()->set_tensor_shape(shape_out);
-}
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "support/StringSupport.h"
using namespace arm_compute;
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CL/kernels/CLCastKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-CLCastKernel::CLCastKernel() : _input(nullptr), _output(nullptr) {}
-
-void CLCastKernel::configure(const ICLTensor *input, ICLTensor *output, SubDataType input_subtype)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
-
- _input = input;
- _output = output;
-
- constexpr unsigned int num_elems_processed_per_iteration = 16;
-
- // Set kernel build options
- CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.add_option("-DDATA_TYPE_OUT=" +
- get_cl_type_from_data_type(output->info()->data_type()));
- build_opts.add_option(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
-
- // Create kernel
- if (is_data_type_quantized_asymmetric(input->info()->data_type()))
- {
- UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
- const float scale_in = qinfo.scale;
- const int offset_in = qinfo.offset;
- build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in));
- build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in));
-
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("cast_qasymm_in", build_opts.options()));
- }
- else if (is_data_type_quantized_asymmetric(output->info()->data_type()))
- {
- UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
- const float scale_in = qinfo.scale;
- const float offset_in = qinfo.offset;
-
- build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in));
- build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in));
-
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("cast_qasymm_out", build_opts.options()));
- }
- else
- {
- build_opts.add_option_if(input_subtype == SubDataType::BOOL, "-DBOOL_INPUT");
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("cast", build_opts.options()));
- }
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
- update_window_and_padding(win, input_access, output_access);
- output_access.set_valid_region(win, input->info()->valid_region());
-
- ICLKernel::configure_internal(win);
-}
-
-void CLCastKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
- Window slice = collapsed.first_slice_window_3D();
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice);
- add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, lws_hint());
- } while (collapsed.slide_window_slice_3D(slice));
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-// TODO Use this validation function
-#if 0
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- const int32_t block_size)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(block_size < 1,
- "Block size should be greater than or equal to 1.");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(0) != input->dimension(0) * block_size,
- "Output width should be equal to (Input width * block size)");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(1) != input->dimension(1) * block_size,
- "Output height should be equal to (Input height * block size)");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(2) % (block_size * block_size) != 0,
- "Input depth should be divisible by (block size * block size)");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- output->dimension(2) != input->dimension(2) / (block_size * block_size),
- "Output depth should be equal to (Input depth / (block size * block size))");
-
- return Status{};
-}
-#endif
-} // namespace
-
-CLDepthToSpaceKernel::CLDepthToSpaceKernel() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void CLDepthToSpaceKernel::configure(const ICLTensor *input, ICLTensor *output,
- const int32_t block_size)
-{
- // TODO Add validation of data_layout
- _input = input;
- _output = output;
-
- // Set kernel build options
- auto layout_out = output->info()->data_layout();
- std::set<std::string> build_opts;
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DBLOCK_SIZE=" + support::cpp11::to_string(block_size));
- auto index_depth = get_data_layout_dimension_index(layout_out, DataLayoutDimension::CHANNEL);
- auto depth = output->info()->dimension(index_depth);
- build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(depth));
- build_opts.emplace("-DZ_OUT=" + support::cpp11::to_string(output->info()->tensor_shape().z()));
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(
- "depth_to_space_" + lower_string(string_from_data_layout(layout_out)), build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*output->info(), Steps());
-
- Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-void CLDepthToSpaceKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
- Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- // Setup input slice
- Window slice_in(slice_out);
- slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_in.set(3, Window::Dimension(0, 0, 0));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
- } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
-}
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "support/StringSupport.h"
using namespace arm_compute;
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/AccessWindowTranspose.h"
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "support/ToolchainSupport.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <tuple>
-
-using namespace arm_compute;
-using namespace arm_compute::misc::shape_calculator;
-
-namespace arm_compute
-{
-class Coordinates;
-} // namespace arm_compute
-
-namespace
-{
-using ElementsProcessed = Steps;
-
-Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1,
- const ITensorInfo *output, const GEMMReshapeInfo &gemm_info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::S8);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input0->num_dimensions() > 4,
- "The number of dimensions for the matrix A must be <= 4");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 3,
- "The number of dimensions for the matrix B must be <= 3");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 2 &&
- gemm_info.reinterpret_input_as_3d(),
- "The input1 tensor cannot have more than 2 dimensions if input0 "
- "has to be reinterpreted as 3D");
-
- const int m = gemm_info.m();
- const int n = gemm_info.n();
- const int k = gemm_info.k();
-
- ARM_COMPUTE_UNUSED(m);
- ARM_COMPUTE_UNUSED(n);
- ARM_COMPUTE_UNUSED(k);
-
- ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != static_cast<unsigned int>(k));
- ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) != static_cast<unsigned int>(n));
- ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(1) != static_cast<unsigned int>(k));
- if (gemm_info.reinterpret_input_as_3d())
- {
- ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) * input0->dimension(2) !=
- static_cast<unsigned int>(m));
- }
- else
- {
- ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != static_cast<unsigned int>(m));
- }
-
- if (output->total_size() != 0)
- {
- const TensorInfo tensor_info_output =
- output->clone()->set_tensor_shape(compute_mm_shape(*input0, *input1, false, gemm_info));
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32);
- }
-
- return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1,
- ITensorInfo *output,
- const GEMMReshapeInfo &gemm_info,
- ElementsProcessed &num_elements_processed)
-{
- unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
- unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
- bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0);
-
- Window win{};
- Window win_out{};
- bool window_changed = false;
-
- // In case both input and output have to be reinterpreted as 3D tensors,
- // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
- if (reinterpret_input_as_3d == reinterpret_output_as_3d)
- {
- reinterpret_input_as_3d = false;
- reinterpret_output_as_3d = false;
- }
-
- // Output tensor auto inizialitation if not yet initialized
- auto_init_if_empty(*output,
- input0->clone()
- ->set_tensor_shape(compute_mm_shape(*input0, *input1, false, gemm_info))
- .set_data_type(DataType::S32));
-
- TensorInfo tmp_info(*output);
-
- if (reinterpret_output_as_3d)
- {
- // Since the output tensor has to be reinterpreted as 3D and the execute window is based on a 2D
- // GEMM,
- // the window needs to be constructed on the 2D collapsed version of the tensor
- TensorShape tmp_shape(output->tensor_shape());
- tmp_shape.collapse(2U, 1U);
- tmp_info.set_tensor_shape(tmp_shape);
- }
-
- // Special case for 1xN, 2xN, 3xN and 4xN input0 tensor. num_elems_processed_per_iteration_x
- // Note: if the dot product instruction is available, the 8x2 tile has to be used
- num_elems_processed_per_iteration_x = 4;
- num_elems_processed_per_iteration_y = std::min(static_cast<int>(output->dimension(1)), 4);
-
- // Note: bottom paddings are calculated manually as the output can be reinterpreted as 3D tensor
- // The only way to set properly the paddings, it is to set those explicitly through the
- // AccessWindowStatic
- const int m = reinterpret_input_as_3d ? input0->tensor_shape()[1] * input0->tensor_shape()[2]
- : input0->tensor_shape()[1];
- const int bottom_pad =
- (num_elems_processed_per_iteration_y - (m % num_elems_processed_per_iteration_y)) %
- num_elems_processed_per_iteration_y;
-
- // Configure window
- win = calculate_max_window(
- tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
- win_out = calculate_max_window(
- *output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
-
- AccessWindowStatic input0_access(input0, 0, 0, input0->dimension(0),
- input0->dimension(1) + bottom_pad);
- AccessWindowStatic input1_access(
- input1, 0, 0, ceil_to_multiple(input1->dimension(0), num_elems_processed_per_iteration_x),
- input1->dimension(1));
- AccessWindowStatic output_access(
- output, 0, 0, ceil_to_multiple(output->dimension(0), num_elems_processed_per_iteration_x),
- output->dimension(1) + bottom_pad);
-
- window_changed =
- update_window_and_padding(win, input0_access,
- input1_access) || // window used by the execute_window_loop
- update_window_and_padding(
- win_out,
- output_access); // window used to update the padding requirements of output tensor
-
- Coordinates coord;
- coord.set_num_dimensions(output->num_dimensions());
- output_access.set_valid_region(win_out, ValidRegion(coord, output->tensor_shape()));
-
- // Collapse along the Z direction
- // This collapse needs to be here in order to tune the Z dimension of LWS
- Window collapsed = win;
- const unsigned int dimension_to_collapse =
- std::min(static_cast<unsigned int>(output->num_dimensions()), 2u);
- collapsed = win.collapse(win, dimension_to_collapse);
-
- Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
- return std::make_pair(err, collapsed);
-}
-} // namespace
-
-CLGEMMLowpMatrixMultiplyKernelEx::CLGEMMLowpMatrixMultiplyKernelEx()
- : _input0(nullptr), _input1(nullptr), _output(nullptr), _slide_matrix_b(true),
- _reinterpret_input_as_3d(false), _reinterpret_output_as_3d(false)
-{
-}
-
-void CLGEMMLowpMatrixMultiplyKernelEx::configure(const ICLTensor *input0, const ICLTensor *input1,
- ICLTensor *output,
- const GEMMReshapeInfo &gemm_info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
-
- ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input0->info(), input1->info(), output->info(), gemm_info));
-
- _input0 = input0;
- _input1 = input1;
- _output = output;
- _reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- _reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0);
-
- // In case both input and output have to be reinterpreted as 3D tensors,
- // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
- if (_reinterpret_input_as_3d == _reinterpret_output_as_3d)
- {
- _reinterpret_input_as_3d = false;
- _reinterpret_output_as_3d = false;
- }
-
- // Check if we need to slide the matrix B
- const unsigned int num_dimensions_input0 = _reinterpret_input_as_3d
- ? _input0->info()->num_dimensions() - 1
- : _input0->info()->num_dimensions();
- _slide_matrix_b = (_input1->info()->num_dimensions() >= num_dimensions_input0);
-
- ElementsProcessed num_elements_processed{};
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(),
- gemm_info, num_elements_processed);
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure_internal(win_config.second);
-
- // Create build options
- std::string kernel_name(" ");
- CLBuildOptions build_opts;
- build_opts.add_option_if(_reinterpret_input_as_3d, "-DREINTERPRET_INPUT_AS_3D");
- build_opts.add_option_if(_reinterpret_output_as_3d, "-DREINTERPRET_OUTPUT_AS_3D");
- build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d,
- "-DHEIGHT_GEMM3D=" +
- support::cpp11::to_string(output->info()->dimension(1)));
- build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d,
- "-DDEPTH_GEMM3D=" +
- support::cpp11::to_string(output->info()->dimension(2)));
- build_opts.add_option_if(!_slide_matrix_b,
- "-DMATRIX_B_DEPTH=" +
- support::cpp11::to_string(input1->info()->dimension(2)));
- build_opts.add_option("-DCOLS_A=" + support::cpp11::to_string(input0->info()->dimension(0)));
- build_opts.add_option("-DNUM_ELEMS_PROCESSED_PER_THREAD_X=" +
- support::cpp11::to_string(num_elements_processed.x()));
- build_opts.add_option("-DNUM_ELEMS_PROCESSED_PER_THREAD_Y=" +
- support::cpp11::to_string(num_elements_processed.y()));
-
- kernel_name = "gemmlowp_mm_midgard_ex";
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
-
- // Set config_id for enabling LWS tuning
- _config_id = kernel_name;
- _config_id += "_";
- _config_id += (_reinterpret_input_as_3d ? "3di_" : "");
- _config_id += (_reinterpret_output_as_3d ? "3do_" : "");
- _config_id += lower_string(string_from_data_type(input0->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(0));
-}
-
-Status CLGEMMLowpMatrixMultiplyKernelEx::validate(const ITensorInfo *input0,
- const ITensorInfo *input1,
- const ITensorInfo *output,
- const GEMMReshapeInfo &gemm_info)
-{
- ElementsProcessed num_elements_processed{};
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output, gemm_info));
- ARM_COMPUTE_RETURN_ON_ERROR(
- validate_and_configure_window(input0->clone().get(), input1->clone().get(),
- output->clone().get(), gemm_info, num_elements_processed)
- .first);
-
- return Status{};
-}
-
-void CLGEMMLowpMatrixMultiplyKernelEx::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- if (_input1->info()->num_dimensions() < 3)
- {
- // The stride_z for matrix B must be zero if we do not slice
- ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
- }
-
- Window slice = window.first_slice_window_3D();
- Window slice_matrix_b = slice;
-
- slice_matrix_b.set(Window::DimX, Window::Dimension(0, 1, 1));
- slice_matrix_b.set(Window::DimY, Window::Dimension(0, 1, 1));
-
- if (_reinterpret_input_as_3d)
- {
- // Pass bottom paddings to the kernel if the input has to be reinterpreted as 3D tensor
- const unsigned int idx0 = 3 * num_arguments_per_2D_tensor() + 3;
- const unsigned int total_cross_plane_pad =
- _input0->info()->padding().top + _input0->info()->padding().bottom;
- _kernel.setArg<cl_uint>(idx0, static_cast<unsigned int>(total_cross_plane_pad));
- }
-
- if (_reinterpret_output_as_3d)
- {
- // Pass bottom paddings to the kernel if the output has to be reinterpreted as 3D tensor
- const unsigned int idx0 =
- 3 * num_arguments_per_2D_tensor() + 3 + (_reinterpret_input_as_3d ? 1 : 0);
- const unsigned int total_cross_plane_pad =
- _output->info()->padding().top + _output->info()->padding().bottom;
- _kernel.setArg<cl_uint>(idx0, static_cast<unsigned int>(total_cross_plane_pad));
- }
-
- do
- {
- Window slice_b = slice;
- // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A
- // more than 2
- // This scenario can happen when the matrix multiplication is used to perform a convolution
- // operation
- if (!_slide_matrix_b)
- {
- slice_b = slice_matrix_b;
- }
-
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input0, slice);
- add_2D_tensor_argument(idx, _input1, slice_b);
- add_2D_tensor_argument(idx, _output, slice);
- _kernel.setArg<cl_uint>(idx++,
- static_cast<unsigned int>(_input0->info()->strides_in_bytes()[2]));
- _kernel.setArg<cl_uint>(idx++,
- static_cast<unsigned int>(_input1->info()->strides_in_bytes()[2]));
- _kernel.setArg<cl_uint>(idx++,
- static_cast<unsigned int>(_output->info()->strides_in_bytes()[2]));
- enqueue(queue, *this, slice, lws_hint());
- } while (window.slide_window_slice_3D(slice));
-}
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
#include "arm_compute/core/UtilsEx.h"
+#include "support/StringSupport.h"
using namespace arm_compute;
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "support/StringSupport.h"
using namespace arm_compute;
_hits = hits;
// Make _lookup_indices tensor
- _lookup_indices = arm_compute::support::cpp14::make_unique<CLTensor>();
+ _lookup_indices = support::cpp14::make_unique<CLTensor>();
_lookup_indices->allocator()->init(
TensorInfo(lookups->info()->tensor_shape(), lookups->info()->num_channels(), DataType::S32));
_lookup_indices->allocator()->allocate();
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Window.h"
-
+#include "support/StringSupport.h"
#include "support/ToolchainSupport.h"
namespace arm_compute
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "support/StringSupport.h"
using namespace arm_compute;
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "support/StringSupport.h"
using namespace arm_compute;
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CL/kernels/CLPReLUKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-Status validate_info(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output)
-{
- const TensorShape &out_shape =
- TensorShape::broadcast_shape(input->tensor_shape(), alpha->tensor_shape());
-
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32,
- DataType::QASYMM8);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(alpha, 1, DataType::F16, DataType::F32,
- DataType::QASYMM8);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0,
- "Inputs are not broadcast compatible");
- // Validate in case of configured output
- if (output->total_size() > 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32,
- DataType::QASYMM8);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(out_shape, output->tensor_shape(), 0),
- "Wrong shape for output");
- }
- return Status{};
-}
-} // namespace
-
-CLPReLUKernel::CLPReLUKernel() : _input(nullptr), _alpha(nullptr), _output(nullptr) {}
-
-void CLPReLUKernel::configure(const ICLTensor *input, const ICLTensor *alpha, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, alpha);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_info(input->info(), alpha->info(), output->info()));
-
- _input = input;
- _alpha = alpha;
- _output = output;
-
- // Create kernel
- std::string kernel_name = "prelu";
- std::set<std::string> build_opts;
- build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
- build_opts.emplace(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
-
- if (is_data_type_quantized_asymmetric(input->info()->data_type()))
- {
- build_opts.emplace("-DOFF_IN=" + support::cpp11::to_string(
- input->info()->quantization_info().uniform().offset));
- build_opts.emplace("-DOFF_ALPHA=" + support::cpp11::to_string(
- alpha->info()->quantization_info().uniform().offset));
- build_opts.emplace("-DOFF_OUT=" + support::cpp11::to_string(
- output->info()->quantization_info().uniform().offset));
- build_opts.emplace("-DSCALE_IN=" + support::cpp11::to_string(
- input->info()->quantization_info().uniform().scale));
- build_opts.emplace("-DSCALE_ALPHA=" + support::cpp11::to_string(
- alpha->info()->quantization_info().uniform().scale));
- build_opts.emplace("-DSCALE_OUT=" + support::cpp11::to_string(
- output->info()->quantization_info().uniform().scale));
- kernel_name += "_qasymm8";
- }
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
-
- const std::pair<TensorShape, ValidRegion> broadcast_pair =
- ITensorInfo::broadcast_shape_and_valid_region(*input->info(), *alpha->info());
-
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
-
- // Auto initialize output if not initialized
- {
- set_shape_if_empty(*output->info(), out_shape);
-
- if (input->info()->data_type() == DataType::F16 && alpha->info()->data_type() == DataType::F16)
- {
- set_format_if_unknown(*output->info(), Format::F16);
- }
- else if (input->info()->data_type() == DataType::F32 ||
- alpha->info()->data_type() == DataType::F32)
- {
- set_format_if_unknown(*output->info(), Format::F32);
- }
- }
-
- Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration));
- Window win_input1 = win.broadcast_if_dimension_le_one(*input->info());
- Window win_input2 = win.broadcast_if_dimension_le_one(*alpha->info());
-
- AccessWindowHorizontal input1_access(input->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal input2_access(alpha->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- update_window_and_padding(win_input1, input1_access) ||
- update_window_and_padding(win_input2, input2_access) ||
- update_window_and_padding(win, output_access);
-
- output_access.set_valid_region(win, valid_region);
-
- ICLKernel::configure_internal(win);
-}
-
-void CLPReLUKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const TensorShape &in_shape1 = _input->info()->tensor_shape();
- const TensorShape &in_shape2 = _alpha->info()->tensor_shape();
- const TensorShape &out_shape = _output->info()->tensor_shape();
-
- bool can_collapse = true;
- if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1)
- {
- can_collapse =
- (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
- for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++)
- {
- can_collapse = (in_shape1[d] == in_shape2[d]);
- }
- }
-
- bool has_collapsed = false;
- Window collapsed =
- can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed)
- : window;
-
- const TensorShape &in_shape1_collapsed =
- has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
- const TensorShape &in_shape2_collapsed =
- has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
-
- Window slice = collapsed.first_slice_window_3D();
- Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed);
- Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed);
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice_input1);
- add_3D_tensor_argument(idx, _alpha, slice_input2);
- add_3D_tensor_argument(idx, _output, slice);
-
- enqueue(queue, *this, slice);
-
- collapsed.slide_window_slice_3D(slice_input1);
- collapsed.slide_window_slice_3D(slice_input2);
- } while (collapsed.slide_window_slice_3D(slice));
-}
-
-BorderSize CLPReLUKernel::border_size() const
-{
- const unsigned int replicateSize =
- _output->info()->dimension(0) -
- std::min(_input->info()->dimension(0), _alpha->info()->dimension(0));
- const unsigned int border =
- std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
- return BorderSize(0, border, 0, 0);
-}
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "support/StringSupport.h"
namespace arm_compute
{
// Output must always be initialized
ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8_SIGNED);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
return Status{};
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "support/StringSupport.h"
using namespace arm_compute;
namespace
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "support/StringSupport.h"
#include <climits>
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- const int32_t block_size)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(block_size < 1,
- "Block size should be greater than or equal to 1.");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(3) != output->dimension(3),
- "Input batch should be equal to Output batch");
-
- auto layout_out = input->data_layout();
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
-
- auto index_depth = get_data_layout_dimension_index(layout_out, DataLayoutDimension::CHANNEL);
- auto index_height = get_data_layout_dimension_index(layout_out, DataLayoutDimension::HEIGHT);
- auto index_width = get_data_layout_dimension_index(layout_out, DataLayoutDimension::WIDTH);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- input->dimension(index_depth) * block_size * block_size != output->dimension(index_depth),
- "Output depth should be equal to (input depth * block size *block size)");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((input->dimension(index_width) % block_size) ||
- (input->dimension(index_height) % block_size),
- "Input height and width should be divisible by block size");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- (output->dimension(index_width) != (input->dimension(index_width) / block_size)) ||
- (output->dimension(index_height) != (input->dimension(index_height) / block_size)),
- "Output height and width should be equal to "
- "input_height/blocksize and input_width/blocksize respectively");
-
- return Status{};
-}
-
-} // namespace
-
-CLSpaceToDepthKernel::CLSpaceToDepthKernel() : _input(nullptr), _output(nullptr) {}
-
-void CLSpaceToDepthKernel::configure(const ICLTensor *input, ICLTensor *output,
- const int32_t block_size)
-{
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_size));
-
- _input = input;
- _output = output;
-
- // Set kernel build options
- auto layout_out = input->info()->data_layout();
- std::set<std::string> build_opts;
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DBLOCK_SIZE=" + support::cpp11::to_string(block_size));
- auto index_depth = get_data_layout_dimension_index(layout_out, DataLayoutDimension::CHANNEL);
- auto depth = input->info()->dimension(index_depth);
- build_opts.emplace("-DDEPTH_IN=" + support::cpp11::to_string(depth));
- build_opts.emplace("-DZ_IN=" + support::cpp11::to_string(input->info()->tensor_shape().z()));
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(
- "space_to_depth_" + lower_string(string_from_data_layout(layout_out)), build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps());
-
- Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-void CLSpaceToDepthKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
- Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- // Setup output slice
- Window slice_out(slice_in);
- slice_out.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_out.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_out.set(3, Window::Dimension(0, 0, 0));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_in);
- } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
-}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-using namespace arm_compute;
-
-CLTransposeConvLayerUpsampleKernel::CLTransposeConvLayerUpsampleKernel()
- : _input(nullptr), _output(nullptr), _inner_border(), _info()
-{
-}
-
-Status CLTransposeConvLayerUpsampleKernel::validate(const ITensorInfo *input,
- const ITensorInfo *output,
- const BorderSize &inner_border,
- const PadStrideInfo &info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
-
- const DataLayout data_layout = input->data_layout();
-
- const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
-
- ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_w) == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_h) == 0);
-
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_c) != output->dimension(idx_c));
- for (size_t i = 3; i < Coordinates::num_max_dimensions; ++i)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i));
- }
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border.right > info.stride().first - 1,
- "inner_border_right must be smaller that stride_x");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border.top > info.stride().second - 1,
- "inner_border_top must be smaller that stride_y");
-
- return Status{};
-}
-
-void CLTransposeConvLayerUpsampleKernel::configure(const ICLTensor *input, ICLTensor *output,
- const BorderSize &inner_border,
- const PadStrideInfo &info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- _input = input;
- _output = output;
- _inner_border = inner_border;
- _info = info;
-
- // Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(CLTransposeConvLayerUpsampleKernel::validate(
- input->info(), output->info(), inner_border, info));
-
- // Create kernel
- CLBuildOptions build_opts;
- build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibrary::get().create_kernel("deconvolution_upsample", build_opts.options()));
-
- constexpr unsigned int num_elems_processed_per_iteration = 1;
-
- // Configure kernel window
- Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
- output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-void CLTransposeConvLayerUpsampleKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const DataLayout data_layout = _input->info()->data_layout();
-
- const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
-
- const int out_start_x = _info.pad_left();
- const int out_end_x = _output->info()->dimension(idx_w) - _inner_border.right -
- _info.pad_right() + _info.stride().first - 1;
- const int out_step_x = _info.stride().first;
-
- const int out_start_y = _inner_border.top + _info.pad_top();
- const int out_end_y =
- _output->info()->dimension(idx_h) - _info.pad_bottom() + _info.stride().second - 1;
- const int out_step_y = _info.stride().second;
-
- switch (data_layout)
- {
- case DataLayout::NCHW:
- {
- Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
-
- Window slice_out = collapsed.first_slice_window_3D();
- slice_out.set(Window::DimX, Window::Dimension(out_start_x, out_end_x, out_step_x));
- slice_out.set(Window::DimY, Window::Dimension(out_start_y, out_end_y, out_step_y));
-
- Window slice_in = collapsed.first_slice_window_3D();
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice_in);
- add_3D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
- } while (collapsed.slide_window_slice_3D(slice_in) &&
- collapsed.slide_window_slice_3D(slice_out));
- break;
- }
- case DataLayout::NHWC:
- {
- // NOTE: not collapsing in NHWC
- Window slice_out = window.first_slice_window_3D();
- slice_out.set(Window::DimY, Window::Dimension(out_start_x, out_end_x, out_step_x));
- slice_out.set(Window::DimZ, Window::Dimension(out_start_y, out_end_y, out_step_y));
-
- Window slice_in = window.first_slice_window_3D();
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice_in);
- add_3D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
- } while (window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out));
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Unsupported data layout");
- }
-}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-
-#include <cstddef>
-#include <cstdint>
-
-namespace arm_compute
-{
-CPPUpsampleKernelEx::CPPUpsampleKernelEx() : _input(nullptr), _output(nullptr), _info() {}
-
-bool CPPUpsampleKernelEx::is_parallelisable() const { return false; }
-
-void CPPUpsampleKernelEx::configure(const ITensor *input, ITensor *output,
- const PadStrideInfo &info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- _input = input;
- _output = output;
- _info = info;
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps());
-
- // The CPPUpsampleKernelEx doesn't need padding so update_window_and_padding() can be skipped
- Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
- ICPPKernel::configure(win);
-}
-
-void CPPUpsampleKernelEx::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
-
- // Initialize _scaled_output buffer
- const int width_scaled = _output->info()->dimension(0);
- const int height_scaled = _output->info()->dimension(1);
- const int stride_x = _info.stride().first;
- const int stride_y = _info.stride().second;
- const int start_x = _info.pad_left();
- const int start_y = _info.pad_top();
- const int end_y = height_scaled - _info.pad_bottom();
- const int end_x = width_scaled - _info.pad_top();
- const size_t element_size = _input->info()->element_size();
-
- // The fill value is normally 0, but for QASYMM8 the '0' corresponds to the offset
- const uint8_t fill_value =
- _output->info()->data_type() == DataType::QASYMM8
- ? utility::clamp<uint8_t>(_output->info()->quantization_info().uniform().offset)
- : 0;
- // Filling a value different than 0 works only for QASYMM8 datatype since we are filling 1byte
- // values in a buffer of uint8_ts
- std::fill_n(_output->buffer(), _output->info()->total_size(), fill_value);
-
- // Create window
- Window window_out(window);
- window_out.set(Window::DimX, Window::Dimension(start_x, end_x, stride_x));
- window_out.set(Window::DimY, Window::Dimension(start_y, end_y, stride_y));
-
- // Create iterators
- Iterator in(_input, window);
- Iterator out(_output, window_out);
-
- execute_window_loop(
- window, [&](const Coordinates &) { memcpy(out.ptr(), in.ptr(), element_size); }, in, out);
-}
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/NEON/kernels/NECastKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CPP/Validate.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/NEON/NEAsymm.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- SubDataType input_subtype)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8,
- DataType::QASYMM8, DataType::U32,
- DataType::S32, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON(input_subtype == SubDataType::BOOL &&
- input->data_type() != DataType::U8);
-
- if (output->tensor_shape().total_size() > 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S8,
- DataType::QASYMM8, DataType::U32,
- DataType::S32, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
- }
-
- return Status{};
-}
-
-std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
-{
- // Configure kernel window
- Window win = calculate_max_window(*input, Steps());
-
- // Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::F32);
-
- // NECastKernel doesn't need padding so update_window_and_padding() can be skipped
- Coordinates coord;
- coord.set_num_dimensions(output->num_dimensions());
- output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
-
- return std::make_tuple(Status{}, win);
-}
-
-typedef struct bool8x16
-{
- uint8x16_t val;
-} bool8x16_t;
-
-static inline uint8x16_t vreinterpretq_u8_b8(bool8x16_t __a) { return (uint8x16_t)__a.val; }
-
-template <typename ToV, typename FromV> inline ToV vcast(const FromV &v) { return v; }
-template <> inline uint8x16_t vcast(const bool8x16_t &v)
-{
- const uint8x16_t vu8 = vreinterpretq_u8_b8(v);
- const uint8x16_t zero_uint8x16 = vdupq_n_u8(0);
- uint8x16_t mask = vcgtq_u8(vu8, zero_uint8x16);
- return vshrq_n_u8(mask, 7); // true -> 1, false -> 0
-}
-
-template <> inline uint32x4x4_t vcast(const bool8x16_t &v)
-{
- const uint8x16_t vu8 = vreinterpretq_u8_b8(v);
- const uint8x16_t zero_uint8x16 = vdupq_n_u8(0);
- uint8x16_t mask = vcgtq_u8(vu8, zero_uint8x16);
- uint8x16_t vb = vshrq_n_u8(mask, 7); // true -> 1, false -> 0
-
- const uint32x4x4_t ret = {{
- vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(vb)))),
- vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(vb)))),
- vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(vb)))),
- vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(vb)))),
- }};
-
- return ret;
-}
-
-template <> inline int32x4x4_t vcast(const bool8x16_t &v)
-{
- const uint8x16_t vu8 = vreinterpretq_u8_b8(v);
- const uint8x16_t zero_uint8x16 = vdupq_n_u8(0);
- uint8x16_t mask = vcgtq_u8(vu8, zero_uint8x16);
- uint8x16_t vb = vshrq_n_u8(mask, 7); // true -> 1, false -> 0
-
- const int32x4x4_t ret = {{
- vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(vb))))),
- vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(vb))))),
- vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(vb))))),
- vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(vb))))),
- }};
-
- return ret;
-}
-
-template <> inline float32x4x4_t vcast(const bool8x16_t &v)
-{
- const uint8x16_t vu8 = vreinterpretq_u8_b8(v);
- const uint8x16_t zero_uint8x16 = vdupq_n_u8(0);
- uint8x16_t mask = vcgtq_u8(vu8, zero_uint8x16);
- uint8x16_t vb = vshrq_n_u8(mask, 7); // true -> 1, false -> 0
-
- const float32x4x4_t ret = {{
- vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(vb))))),
- vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(vb))))),
- vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(vb))))),
- vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(vb))))),
- }};
-
- return ret;
-}
-
-template <> inline uint32x4x4_t vcast(const uint8x16_t &v)
-{
- const uint32x4x4_t ret = {{
- vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(v)))),
- vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(v)))),
- vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(v)))),
- vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(v)))),
- }};
-
- return ret;
-}
-
-template <> inline int32x4x4_t vcast(const uint8x16_t &v)
-{
- const int32x4x4_t ret = {{
- vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(v))))),
- vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(v))))),
- vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(v))))),
- vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(v))))),
- }};
-
- return ret;
-}
-
-template <> inline float32x4x4_t vcast(const uint8x16_t &v)
-{
- const float32x4x4_t ret = {{
- vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(v))))),
- vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(v))))),
- vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(v))))),
- vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(v))))),
- }};
-
- return ret;
-}
-
-template <> inline uint8x16_t vcast(const int32x4x4_t &v)
-{
- // Saturate cast
- return vcombine_u8(vqmovn_u16(vcombine_u16(vqmovun_s32(v.val[0]), vqmovun_s32(v.val[1]))),
- vqmovn_u16(vcombine_u16(vqmovun_s32(v.val[2]), vqmovun_s32(v.val[3]))));
-}
-
-template <> inline uint32x4x4_t vcast(const int32x4x4_t &v)
-{
- // Saturate cast
- const uint32x4x4_t ret = {{
- vcombine_u32(vqmovun_s64(vmovl_s32(vget_low_s32(v.val[0]))),
- vqmovun_s64(vmovl_s32(vget_high_s32(v.val[0])))),
- vcombine_u32(vqmovun_s64(vmovl_s32(vget_low_s32(v.val[1]))),
- vqmovun_s64(vmovl_s32(vget_high_s32(v.val[1])))),
- vcombine_u32(vqmovun_s64(vmovl_s32(vget_low_s32(v.val[2]))),
- vqmovun_s64(vmovl_s32(vget_high_s32(v.val[2])))),
- vcombine_u32(vqmovun_s64(vmovl_s32(vget_low_s32(v.val[3]))),
- vqmovun_s64(vmovl_s32(vget_high_s32(v.val[3])))),
- }};
-
- return ret;
-}
-
-template <> inline float32x4x4_t vcast(const int32x4x4_t &v)
-{
- const float32x4x4_t ret = {{
- vcvtq_f32_s32(v.val[0]), vcvtq_f32_s32(v.val[1]), vcvtq_f32_s32(v.val[2]),
- vcvtq_f32_s32(v.val[3]),
- }};
-
- return ret;
-}
-
-template <> inline uint8x16_t vcast(const uint32x4x4_t &v)
-{
- return vcombine_u8(vqmovn_u16(vcombine_u16(vqmovn_u32(v.val[0]), vqmovn_u32(v.val[1]))),
- vqmovn_u16(vcombine_u16(vqmovn_u32(v.val[2]), vqmovn_u32(v.val[3]))));
-}
-
-template <> inline int32x4x4_t vcast(const uint32x4x4_t &v)
-{
- const int32x4x4_t ret = {{
- vcombine_s32(vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_low_u32(v.val[0])))),
- vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_high_u32(v.val[0]))))),
- vcombine_s32(vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_low_u32(v.val[1])))),
- vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_high_u32(v.val[1]))))),
- vcombine_s32(vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_low_u32(v.val[2])))),
- vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_high_u32(v.val[2]))))),
- vcombine_s32(vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_low_u32(v.val[3])))),
- vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_high_u32(v.val[3]))))),
- }};
-
- return ret;
-}
-
-template <> inline float32x4x4_t vcast(const uint32x4x4_t &v)
-{
- const float32x4x4_t ret = {{
- vcvtq_f32_u32(v.val[0]), vcvtq_f32_u32(v.val[1]), vcvtq_f32_u32(v.val[2]),
- vcvtq_f32_u32(v.val[3]),
- }};
-
- return ret;
-}
-
-template <> inline uint8x16_t vcast(const float32x4x4_t &v)
-{
- // Saturate cast
- return vcombine_u8(vqmovn_u16(vcombine_u16(vqmovun_s32(vcvtq_s32_f32(v.val[0])),
- vqmovun_s32(vcvtq_s32_f32(v.val[1])))),
- vqmovn_u16(vcombine_u16(vqmovun_s32(vcvtq_s32_f32(v.val[2])),
- vqmovun_s32(vcvtq_s32_f32(v.val[3])))));
-}
-
-template <> inline uint32x4x4_t vcast(const float32x4x4_t &v)
-{
- const uint32x4x4_t ret = {{
- vcvtq_u32_f32(v.val[0]), vcvtq_u32_f32(v.val[1]), vcvtq_u32_f32(v.val[2]),
- vcvtq_u32_f32(v.val[3]),
- }};
-
- return ret;
-}
-
-template <> inline int32x4x4_t vcast(const float32x4x4_t &v)
-{
- const int32x4x4_t ret = {{
- vcvtq_s32_f32(v.val[0]), vcvtq_s32_f32(v.val[1]), vcvtq_s32_f32(v.val[2]),
- vcvtq_s32_f32(v.val[3]),
- }};
-
- return ret;
-}
-
-template <typename T> struct cast_vector;
-template <> struct cast_vector<bool>
-{
- using type = bool8x16_t;
-};
-template <> struct cast_vector<uint8_t>
-{
- using type = uint8x16_t;
-};
-template <> struct cast_vector<uint32_t>
-{
- using type = uint32x4x4_t;
-};
-template <> struct cast_vector<int32_t>
-{
- using type = int32x4x4_t;
-};
-template <> struct cast_vector<float>
-{
- using type = float32x4x4_t;
-};
-
-template <typename T> inline void store_result(T *ptr, const typename cast_vector<T>::type &v)
-{
- wrapper::vstore(ptr, v.val[0]);
- wrapper::vstore(ptr + 4, v.val[1]);
- wrapper::vstore(ptr + 8, v.val[2]);
- wrapper::vstore(ptr + 12, v.val[3]);
-}
-
-template <> inline void store_result<uint8_t>(uint8_t *ptr, const uint8x16_t &v)
-{
- wrapper::vstore(ptr, v);
-}
-
-inline bool8x16_t vloadq(const bool *ptr)
-{
- bool8x16_t ret;
- ret.val = wrapper::vloadq(reinterpret_cast<const uint8_t *>(ptr));
- return ret;
-}
-
-template <typename T> inline typename cast_vector<T>::type load_input(const T *ptr)
-{
- return wrapper::vloadq(ptr);
-}
-
-template <> inline typename cast_vector<bool>::type load_input(const bool *ptr)
-{
- return vloadq(ptr);
-}
-
-template <> inline typename cast_vector<uint32_t>::type load_input(const uint32_t *ptr)
-{
- return vld4q_u32(ptr);
-}
-
-template <> inline typename cast_vector<int32_t>::type load_input(const int32_t *ptr)
-{
- return vld4q_s32(ptr);
-}
-
-template <> inline typename cast_vector<float>::type load_input(const float *ptr)
-{
- return vld4q_f32(ptr);
-}
-
-template <typename T> inline T get_value(const T *ptr) { return *ptr; }
-
-template <> inline bool get_value(const bool *ptr)
-{
- bool ret = (*ptr != 0);
- return ret;
-}
-
-template <typename FromT> void run_cast(const ITensor *input, ITensor *output, const Window &window)
-{
- const int window_step_x = 16;
- const auto window_start_x = static_cast<int>(window.x().start());
- const auto window_end_x = static_cast<int>(window.x().end());
-
- // Collapse window and reset first dimension to handle tail calculations manually
- Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
- win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
-
- // Create iterators
- Iterator in(input, win_collapsed);
- Iterator out(output, win_collapsed);
-
-#ifdef __aarch64__
- constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN;
-#else //__aarch64__
- constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO;
-#endif //__aarch64__
-
- execute_window_loop(
- win_collapsed,
- [&](const Coordinates &) {
- const auto in_ptr = reinterpret_cast<const FromT *>(in.ptr());
-
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- using from_vector = typename cast_vector<FromT>::type;
- const from_vector vin = load_input(in_ptr + x);
-
- switch (output->info()->data_type())
- {
- case DataType::U8:
- {
- using to_vector = typename cast_vector<uint8_t>::type;
- const to_vector vout = vcast<to_vector, from_vector>(vin);
- store_result<uint8_t>(reinterpret_cast<uint8_t *>(out.ptr()) + x, vout);
- break;
- }
- case DataType::QASYMM8:
- {
- using to_vector = typename cast_vector<float>::type;
- const UniformQuantizationInfo &qinfo_out =
- output->info()->quantization_info().uniform();
- const auto vf = vcast<to_vector, from_vector>(vin);
- const auto vout = vquantize(vf, qinfo_out);
- store_result<qasymm8_t>(reinterpret_cast<qasymm8_t *>(out.ptr()) + x, vout);
- break;
- }
- case DataType::U32:
- {
- using to_vector = typename cast_vector<uint32_t>::type;
- const to_vector vout = vcast<to_vector, from_vector>(vin);
- store_result<uint32_t>(reinterpret_cast<uint32_t *>(out.ptr()) + x, vout);
- break;
- }
- case DataType::S32:
- {
- using to_vector = typename cast_vector<int32_t>::type;
- const to_vector vout = vcast<to_vector, from_vector>(vin);
- store_result<int32_t>(reinterpret_cast<int32_t *>(out.ptr()) + x, vout);
- break;
- }
- case DataType::F32:
- {
- using to_vector = typename cast_vector<float>::type;
- const to_vector vout = vcast<to_vector, from_vector>(vin);
- store_result<float>(reinterpret_cast<float *>(out.ptr()) + x, vout);
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Unsupported data type.");
- }
- }
-
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- FromT val = get_value(in_ptr + x);
- switch (output->info()->data_type())
- {
- case DataType::U8:
- {
- *(reinterpret_cast<uint8_t *>(out.ptr()) + x) = static_cast<uint8_t>(val);
- break;
- }
- case DataType::QASYMM8:
- {
- const QuantizationInfo &qinfo_out = output->info()->quantization_info();
- const auto qval =
- quantize_qasymm8(static_cast<float>(val), qinfo_out, rounding_policy);
- *(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval;
- break;
- }
- case DataType::U32:
- {
- *(reinterpret_cast<uint32_t *>(out.ptr()) + x) = static_cast<uint32_t>(val);
- break;
- }
- case DataType::S32:
- {
- *(reinterpret_cast<int32_t *>(out.ptr()) + x) = static_cast<int32_t>(val);
- break;
- }
- case DataType::F32:
- {
- *(reinterpret_cast<float *>(out.ptr()) + x) = static_cast<float>(val);
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Unsupported data type.");
- }
- }
- },
- in, out);
-}
-
-void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &window)
-{
- const int window_step_x = 16;
- const auto window_start_x = static_cast<int>(window.x().start());
- const auto window_end_x = static_cast<int>(window.x().end());
-
- // Collapse window and reset first dimension to handle tail calculations manually
- Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
- win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
-
- // Create iterators
- Iterator in(input, win_collapsed);
- Iterator out(output, win_collapsed);
-
-#ifdef __aarch64__
- constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN;
-#else //__aarch64__
- constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO;
-#endif //__aarch64__
- const auto &qinfo_in = input->info()->quantization_info().uniform();
- const auto &qinfo_out = output->info()->quantization_info().uniform();
-
- execute_window_loop(
- win_collapsed,
- [&](const Coordinates &) {
- const auto in_ptr = reinterpret_cast<const qasymm8_t *>(in.ptr());
-
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- using from_vector = typename cast_vector<float>::type;
- const auto vf = wrapper::vloadq(in_ptr + x);
- const auto vin = vdequantize(vf, qinfo_in);
- switch (output->info()->data_type())
- {
- case DataType::U8:
- {
- using to_vector = typename cast_vector<uint8_t>::type;
- const to_vector vout = vcast<to_vector, from_vector>(vin);
- store_result<uint8_t>(reinterpret_cast<uint8_t *>(out.ptr()) + x, vout);
- break;
- }
- case DataType::QASYMM8:
- {
- using to_vector = typename cast_vector<float>::type;
- const auto vf = vcast<to_vector, from_vector>(vin);
- const auto vout = vquantize(vf, qinfo_out);
- store_result<qasymm8_t>(reinterpret_cast<qasymm8_t *>(out.ptr()) + x, vout);
- break;
- }
- case DataType::U32:
- {
- using to_vector = typename cast_vector<uint32_t>::type;
- const to_vector vout = vcast<to_vector, from_vector>(vin);
- store_result<uint32_t>(reinterpret_cast<uint32_t *>(out.ptr()) + x, vout);
- break;
- }
- case DataType::S32:
- {
- using to_vector = typename cast_vector<int32_t>::type;
- const to_vector vout = vcast<to_vector, from_vector>(vin);
- store_result<int32_t>(reinterpret_cast<int32_t *>(out.ptr()) + x, vout);
- break;
- }
- case DataType::F32:
- {
- using to_vector = typename cast_vector<float>::type;
- const to_vector vout = vcast<to_vector, from_vector>(vin);
- store_result<float>(reinterpret_cast<float *>(out.ptr()) + x, vout);
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Unsupported data type.");
- }
- }
-
- // Compute left-over elements
- for (; x < window_end_x; ++x)
- {
- qasymm8_t qval_in = *(in_ptr + x);
- const auto val = dequantize_qasymm8(qval_in, qinfo_in);
-
- switch (output->info()->data_type())
- {
- case DataType::U8:
- {
- *(reinterpret_cast<uint8_t *>(out.ptr()) + x) = static_cast<uint8_t>(val);
- break;
- }
- case DataType::QASYMM8:
- {
- const auto qval_out = quantize_qasymm8(val, qinfo_out, rounding_policy);
- *(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval_out;
- break;
- }
- case DataType::U32:
- {
- *(reinterpret_cast<uint32_t *>(out.ptr()) + x) = static_cast<uint32_t>(val);
- break;
- }
- case DataType::S32:
- {
- *(reinterpret_cast<int32_t *>(out.ptr()) + x) = static_cast<int32_t>(val);
- break;
- }
- case DataType::F32:
- {
- *(reinterpret_cast<float *>(out.ptr()) + x) = static_cast<float>(val);
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Unsupported data type.");
- }
- }
- },
- in, out);
-}
-} // namespace
-
-NECastKernel::NECastKernel() : _input(nullptr), _output(nullptr), _input_subtype(SubDataType::NONE)
-{
-}
-
-void NECastKernel::configure(const ITensor *input, ITensor *output, SubDataType input_subtype)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), input_subtype));
-
- _input = input;
- _output = output;
- _input_subtype = input_subtype;
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), output->info());
-
- ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
-
- INEKernel::configure(std::get<1>(win_config));
-}
-
-Status NECastKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- SubDataType input_subtype)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, input_subtype));
- ARM_COMPUTE_RETURN_ON_ERROR(
- std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
- return Status{};
-}
-
-void NECastKernel::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-
- switch (_input->info()->data_type())
- {
- case DataType::U8:
- if (_input_subtype == SubDataType::BOOL)
- {
- run_cast<bool>(_input, _output, window);
- }
- else
- {
- run_cast<uint8_t>(_input, _output, window);
- }
- break;
- case DataType::QASYMM8:
- run_cast_qasymm8(_input, _output, window);
- break;
- case DataType::U32:
- run_cast<uint32_t>(_input, _output, window);
- break;
- case DataType::S32:
- run_cast<int32_t>(_input, _output, window);
- break;
- case DataType::F32:
- run_cast<float>(_input, _output, window);
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported data type.");
- }
-}
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
-#include <arm_neon.h>
-#include <cstdint>
-
-using namespace arm_compute::misc::shape_calculator;
-
-namespace arm_compute
-{
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
- ARM_COMPUTE_RETURN_ERROR_ON(block_shape < 2);
-
- const DataLayout data_layout = input->data_layout();
- const int idx_channel =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_channel] % (block_shape * block_shape) !=
- 0);
- // Validate output if initialized
- if (output->total_size() != 0)
- {
- const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const int idx_height =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_width] !=
- (block_shape * input->tensor_shape()[idx_width]));
- ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_height] !=
- (block_shape * input->tensor_shape()[idx_height]));
- ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- }
-
- return Status{};
-}
-} // namespace
-
-NEDepthToSpaceLayerKernelEx::NEDepthToSpaceLayerKernelEx()
- : _input(nullptr), _output(nullptr), _block_shape()
-{
-}
-
-void NEDepthToSpaceLayerKernelEx::configure(const ITensor *input, ITensor *output,
- int32_t block_shape)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- TensorShape output_shape = compute_depth_to_space_shape_ex(input->info(), block_shape);
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
-
- // Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_shape));
-
- _input = input;
- _output = output;
- _block_shape = block_shape;
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps());
- ICPPKernel::configure(win);
-}
-
-Status NEDepthToSpaceLayerKernelEx::validate(const ITensorInfo *input, const ITensorInfo *output,
- int32_t block_shape)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, block_shape));
- return Status{};
-}
-
-void NEDepthToSpaceLayerKernelEx::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
-
- const int idx_channel =
- get_data_layout_dimension_index(_input->info()->data_layout(), DataLayoutDimension::CHANNEL);
- const int depth_size = _input->info()->dimension(idx_channel);
- const int r = (depth_size / (_block_shape * _block_shape));
- const int element_size = _input->info()->element_size();
-
- Window slice_out = window.first_slice_window_3D();
-
- // The slice_out slice does not move
- slice_out.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_out.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
-
- // Main loop for NCHW and NHWC
- if (_input->info()->data_layout() == DataLayout::NCHW)
- {
- Window slice_in = window.first_slice_window_2D();
- do
- {
- Iterator in(_input, slice_in);
- execute_window_loop(slice_in,
- [&](const Coordinates &id) {
- const int x = id.x();
- const int y = id.y();
-
- const int z = id.z() % r;
- const int out_x = x * _block_shape + (id.z() / r) % _block_shape;
- const int out_y = y * _block_shape + (id.z() / r) / _block_shape;
- Coordinates output_coords{out_x, out_y, z, id[3]};
- memcpy(_output->ptr_to_element(output_coords), in.ptr(), element_size);
- },
- in);
- } while (window.slide_window_slice_2D(slice_in));
- }
- else
- {
- Window slice_in = window.first_slice_window_3D();
- do
- {
- Iterator in(_input, slice_in);
- execute_window_loop(slice_in,
- [&](const Coordinates &id) {
- const int x = id.y();
- const int y = id.z();
-
- const int z = id.x() % r;
- const int out_x = x * _block_shape + (id.x() / r) % _block_shape;
- const int out_y = y * _block_shape + (id.x() / r) / _block_shape;
- Coordinates output_coords{z, out_x, out_y, id[3]};
- memcpy(_output->ptr_to_element(output_coords), in.ptr(), element_size);
- },
- in);
- } while (window.slide_window_slice_3D(slice_in));
- }
-}
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h"
-
-#include "arm_compute/core/CPP/Validate.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/NEAsymm.h"
-#include "arm_compute/core/NEON/NEFixedPoint.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-
-#include <algorithm>
-#include <arm_neon.h>
-#include <cstdint>
-#include <map>
-#include <string>
-
-namespace arm_compute
-{
-class Coordinates;
-
-namespace
-{
-template <ElementWiseUnaryEx op, typename ScalarType>
-inline ScalarType elementwise_op_scalar(const ScalarType &a)
-{
- switch (op)
- {
- case ElementWiseUnaryEx::NEG:
- return -a;
- default:
- ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
- }
-}
-
-template <ElementWiseUnaryEx op, typename VectorType>
-inline VectorType elementwise_op(const VectorType &a)
-{
- switch (op)
- {
- case ElementWiseUnaryEx::NEG:
- return wrapper::vneg(a);
- default:
- ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
- }
-}
-
-template <ElementWiseUnaryEx op, typename ScalarType>
-void elementwise_op(const ITensor *in, ITensor *out, const Window &window)
-{
- const int window_step_x = 16 / sizeof(ScalarType);
- const auto window_start_x = static_cast<int>(window.x().start());
- const auto window_end_x = static_cast<int>(window.x().end());
-
- Window win = window;
- win.set(Window::DimX, Window::Dimension(0, 1, 1));
-
- Iterator input(in, win);
- Iterator output(out, win);
-
- execute_window_loop(win,
- [&](const Coordinates &) {
- auto output_ptr = reinterpret_cast<ScalarType *>(output.ptr());
- const auto input_ptr = reinterpret_cast<const ScalarType *>(input.ptr());
-
- int x = window_start_x;
- for (; x <= window_end_x - window_step_x; x += window_step_x)
- {
- wrapper::vstore(output_ptr + x,
- elementwise_op<op>(wrapper::vloadq(input_ptr + x)));
- }
- for (; x < window_end_x; ++x)
- {
- *(output_ptr + x) = elementwise_op_scalar<op>(*(input_ptr + x));
- }
- },
- input, output);
-}
-
-template <ElementWiseUnaryEx op>
-std::function<void(const ITensor *input, ITensor *output, const Window &window)>
-configure_func(const ITensor *input, ITensor *output)
-{
- std::string function_to_call("op_");
- function_to_call += string_from_data_type(input->info()->data_type()) + "_";
- function_to_call += string_from_data_type(output->info()->data_type());
-
- static std::map<std::string, NEElementwiseUnaryKernelEx::ElementwiseUnaryFunction *>
- map_function = {
- {"op_F32_F32", &elementwise_op<op, float>}, {"op_S32_S32", &elementwise_op<op, int32_t>},
- };
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- map_function["op_F16_F16"] = &elementwise_op<op, float16_t>;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
- auto it = map_function.find(function_to_call);
-
- if (it != map_function.end())
- {
- auto func = it->second;
- return [func](const ITensor *input, ITensor *output, const Window &window) {
- func(input, output, window);
- };
- }
- return nullptr;
-}
-} // namespace
-
-NEElementwiseUnaryKernelEx::NEElementwiseUnaryKernelEx()
- : _function(nullptr), _input(nullptr), _output(nullptr)
-{
-}
-
-void NEElementwiseUnaryKernelEx::configure(ElementWiseUnaryEx op, const ITensor *input,
- ITensor *output)
-{
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input->info(), *output->info()));
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- // Configure kernel window
- const std::pair<TensorShape, ValidRegion> broadcast_pair =
- ITensorInfo::broadcast_shape_and_valid_region(*input->info());
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
-
- // Auto initialize output if not initialized
- auto_init_if_empty(*output->info(), out_shape, 1, input->info()->data_type());
-
- Window win = calculate_max_window(valid_region);
-
- _input = input;
- _output = output;
-
- INEKernel::configure(win);
-
- switch (op)
- {
- case ElementWiseUnaryEx::NEG:
- _function = configure_func<ElementWiseUnaryEx::NEG>(input, output);
- break;
- default:
- ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
- }
-}
-
-Status NEElementwiseUnaryKernelEx::validate_arguments(const ITensorInfo &input,
- const ITensorInfo &output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::F16, DataType::F32,
- DataType::S32);
-
- // Validate in case of configured output
- if (output.total_size() > 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input, &output);
- }
-
- return Status{};
-}
-
-Status NEElementwiseUnaryKernelEx::validate(ElementWiseUnaryEx op, const ITensorInfo *input,
- const ITensorInfo *output)
-{
- ARM_COMPUTE_UNUSED(op);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input, *output));
- return Status{};
-}
-
-void NEElementwiseUnaryKernelEx::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
- ARM_COMPUTE_ERROR_ON(_function == nullptr);
- _function(_input, _output, window);
-}
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/NEON/kernels/NEPReLUKernel.h"
-
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/NEAsymm.h"
-#include "arm_compute/core/NEON/NEElementwiseOperationFuncs.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Window.h"
-
-#include <arm_neon.h>
-
-using namespace arm_compute;
-namespace
-{
-
-/** Conditional element-wise operations */
-enum class ConditionalOperation
-{
- PRELU, /**< (x * y) for x < 0, x for x >= 0 */
-};
-
-template <ConditionalOperation op, typename ScalarType>
-inline ScalarType elementwise_conditional_op_scalar(const ScalarType &a, const ScalarType &b)
-{
- auto res = ScalarType(0);
-
- switch (op)
- {
- case ConditionalOperation::PRELU:
- res = a < 0 ? a * b : a;
- break;
- default:
- ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
- }
- return res;
-}
-
-template <ConditionalOperation op>
-inline uint8_t elementwise_conditional_op_quantized_scalar(const float &a, const float &b,
- QuantizationInfo qinfo)
-{
- return quantize_qasymm8(elementwise_conditional_op_scalar<op>(a, b), qinfo,
- RoundingPolicy::TO_NEAREST_UP);
-}
-
-template <ConditionalOperation op, typename VectorType>
-inline VectorType elementwise_conditional_op(const VectorType &a, const VectorType &b)
-{
- VectorType res = {0, 0, 0, 0};
- VectorType const_0 = {0, 0, 0, 0};
-
- switch (op)
- {
- case ConditionalOperation::PRELU:
- res = wrapper::vbsl(wrapper::vcgt(a, const_0), a, wrapper::vmul(a, b));
- ;
- break;
- default:
- ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
- }
- return res;
-}
-
-template <ConditionalOperation op>
-inline float32x4x4_t elementwise_conditional_op(const float32x4x4_t &a, const float32x4x4_t &b)
-{
- float32x4x4_t out = {{
- elementwise_conditional_op<op>(a.val[0], b.val[0]),
- elementwise_conditional_op<op>(a.val[1], b.val[1]),
- elementwise_conditional_op<op>(a.val[2], b.val[2]),
- elementwise_conditional_op<op>(a.val[3], b.val[3]),
- }};
- return out;
-}
-
-template <ConditionalOperation op, typename ScalarType, typename VectorType>
-inline VectorType elementwise_conditional_op_broadcast(const VectorType &a,
- const ScalarType &broadcast_value,
- const bool reorder)
-{
- VectorType broadcast_vector = wrapper::vdup_n(broadcast_value, wrapper::traits::vector_128_tag());
- return elementwise_conditional_op<op>(reorder ? broadcast_vector : a,
- reorder ? a : broadcast_vector);
-}
-
-template <ConditionalOperation op, typename ScalarType, typename VectorType>
-inline int elementwise_conditional_op_loop(int window_start_x, int window_end_x, int window_step_x,
- const ScalarType *input1_ptr,
- const ScalarType *input2_ptr, ScalarType *output_ptr)
-{
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const auto a = wrapper::vloadq(input1_ptr + x);
- const auto b = wrapper::vloadq(input2_ptr + x);
- wrapper::vstore(output_ptr + x, elementwise_conditional_op<op>(a, b));
- }
- return x;
-}
-
-template <ConditionalOperation op>
-inline int elementwise_conditional_op_quantized_loop(int window_start_x, int window_end_x,
- int window_step_x, const uint8_t *input1_ptr,
- const uint8_t *input2_ptr, uint8_t *output_ptr,
- int32x4_t voffset1, int32x4_t voffset2,
- float32x4_t vscale1, float32x4_t vscale2,
- float32x4_t voffseto, float32x4_t invvscaleo)
-{
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- // Get inputs and compute output
- const float32x4x4_t af = load_quantized(input1_ptr + x, voffset1, vscale1);
- const float32x4x4_t bf = load_quantized(input2_ptr + x, voffset2, vscale2);
- const float32x4x4_t rf = elementwise_conditional_op<op>(af, bf);
- store_quantized(output_ptr + x, rf, voffseto, invvscaleo);
- }
- return x;
-}
-
-template <ConditionalOperation op, typename ScalarType, typename VectorType>
-inline int elementwise_conditional_op_broadcast_loop(int window_start_x, int window_end_x,
- int window_step_x,
- const ScalarType *non_broadcast_input_ptr,
- const ScalarType &broadcast_value,
- ScalarType *output_ptr, const bool reorder)
-{
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const auto a = wrapper::vloadq((non_broadcast_input_ptr + x));
- wrapper::vstore(output_ptr + x,
- elementwise_conditional_op_broadcast<op>(a, broadcast_value, reorder));
- }
- return x;
-}
-
-template <ConditionalOperation op>
-inline int elementwise_conditional_op_quantized_broadcast_loop(
- int window_start_x, int window_end_x, int window_step_x, const uint8_t *non_broadcast_input_ptr,
- float32x4x4_t broadcast_vector, uint8_t *output_ptr, int32x4_t voffset_non_broadcast,
- float32x4_t vscale_non_broadcast, float32x4_t voffseto, float32x4_t invvscaleo, bool reorder)
-{
- int x = window_start_x;
- for (; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const float32x4x4_t af =
- load_quantized(non_broadcast_input_ptr + x, voffset_non_broadcast, vscale_non_broadcast);
- const float32x4x4_t rf = elementwise_conditional_op<op>(reorder ? broadcast_vector : af,
- reorder ? af : broadcast_vector);
- store_quantized(output_ptr + x, rf, voffseto, invvscaleo);
- }
- return x;
-}
-
-template <ConditionalOperation op, typename ScalarType, typename VectorType>
-void elementwise_conditional_op(const ITensor *in1, const ITensor *in2, ITensor *out,
- const Window &window)
-{
- elementwise_op(in1, in2, out, window, &elementwise_conditional_op_scalar<op, ScalarType>,
- &elementwise_conditional_op_broadcast_loop<op, ScalarType, VectorType>,
- &elementwise_conditional_op_loop<op, ScalarType, VectorType>);
-}
-
-template <ConditionalOperation op>
-void elementwise_conditional_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *out,
- const Window &window)
-{
- elementwise_op_quantized(in1, in2, out, window, &elementwise_conditional_op_quantized_scalar<op>,
- &elementwise_conditional_op_quantized_broadcast_loop<op>,
- &elementwise_conditional_op_quantized_loop<op>);
-}
-} // namespace
-
-NEPReLUKernel::NEPReLUKernel() : _input(nullptr), _alpha(nullptr), _output(nullptr) {}
-
-void NEPReLUKernel::configure(const ITensor *input, const ITensor *alpha, ITensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, alpha, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input->info(), *alpha->info(), *output->info()));
-
- // Configure kernel window
- const std::pair<TensorShape, ValidRegion> broadcast_pair =
- ITensorInfo::broadcast_shape_and_valid_region(*input->info(), *alpha->info());
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
-
- // Auto initialize output if not initialized
- auto_init_if_empty(*output->info(), out_shape, 1, input->info()->data_type());
-
- Window win = calculate_max_window(valid_region);
-
- _input = input;
- _alpha = alpha;
- _output = output;
- INEKernel::configure(win);
-}
-
-void NEPReLUKernel::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- if (_input->info()->data_type() == DataType::F32)
- {
- elementwise_conditional_op<ConditionalOperation::PRELU, float, float32x4_t>(_input, _alpha,
- _output, window);
- }
- else if (_input->info()->data_type() == DataType::QASYMM8)
- {
- elementwise_conditional_op_quantized<ConditionalOperation::PRELU>(_input, _alpha, _output,
- window);
- }
- else
- {
- ARM_COMPUTE_ERROR("Wrong Type");
- }
-}
-
-Status NEPReLUKernel::validate_arguments(const ITensorInfo &input, const ITensorInfo &alpha,
- const ITensorInfo &output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::QASYMM8, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input, &alpha, &output);
-
- const TensorShape out_shape =
- TensorShape::broadcast_shape(input.tensor_shape(), alpha.tensor_shape());
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0,
- "Inputs are not broadcast compatible");
-
- // Checks performed when output is configured
- if (output.total_size() > 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
- "Wrong shape for output");
- }
-
- return Status{};
-}
-
-Status NEPReLUKernel::validate(const ITensorInfo *input, const ITensorInfo *alpha,
- const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, alpha, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input, *alpha, *output));
-
- return Status{};
-}
ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8_SIGNED);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scale_factor, 1, DataType::F16,
DataType::F32);
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
-#include <arm_neon.h>
-#include <cstdint>
-
-using namespace arm_compute::misc::shape_calculator;
-
-namespace arm_compute
-{
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
-
- ARM_COMPUTE_RETURN_ERROR_ON(block_shape < 1);
-
- // Validate output if initialized
- if (output->total_size() != 0)
- {
- const DataLayout data_layout = input->data_layout();
- const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const int idx_height =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- const int idx_channel =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- const int idx_batch =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
- ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_width] % block_shape != 0);
- ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_height] % block_shape != 0);
- ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_batch] !=
- output->tensor_shape()[idx_batch]);
- ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_channel] % (block_shape * block_shape) !=
- 0);
- ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().total_size() !=
- output->tensor_shape().total_size());
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- }
-
- return Status{};
-}
-} // namespace
-
-NESpaceToDepthLayerKernelEx::NESpaceToDepthLayerKernelEx()
- : _input(nullptr), _output(nullptr), _block_shape()
-{
-}
-
-void NESpaceToDepthLayerKernelEx::configure(const ITensor *input, ITensor *output,
- int32_t block_shape)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- TensorShape output_shape = compute_space_to_depth_shape_ex(input->info(), block_shape);
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
-
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_shape));
-
- _input = input;
- _block_shape = block_shape;
- _output = output;
-
- // Configure kernel window
- Window win = calculate_max_window(*output->info(), Steps());
- INEKernel::configure(win);
-}
-
-Status NESpaceToDepthLayerKernelEx::validate(const ITensorInfo *input, const ITensorInfo *output,
- int32_t block_shape)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, block_shape));
- return Status{};
-}
-
-void NESpaceToDepthLayerKernelEx::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
-
- const DataLayout data_layout = _input->info()->data_layout();
- const int channel_idx =
- get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- const int element_size = _input->info()->element_size();
-
- const size_t channel_size = _input->info()->dimension(channel_idx);
-
- Window slice_out = window.first_slice_window_3D();
-
- int batch_id = 0;
-
- // Main loop for NCHW and NHWC
- if (_output->info()->data_layout() == DataLayout::NCHW)
- {
- do
- {
- Iterator out(_output, slice_out);
- execute_window_loop(slice_out,
- [&](const Coordinates &id) {
- const size_t channel_id = id.z();
- const size_t in_x =
- id.x() * _block_shape + (channel_id / channel_size) % _block_shape;
- const size_t in_y =
- id.y() * _block_shape + (channel_id / channel_size) / _block_shape;
- const int z = channel_id % channel_size;
- Coordinates input_coords{in_x, in_y, z, batch_id};
- memcpy(out.ptr(), _input->ptr_to_element(input_coords), element_size);
- },
- out);
- ++batch_id;
- } while (window.slide_window_slice_3D(slice_out));
- }
- else
- {
- do
- {
- Iterator out(_output, slice_out);
- execute_window_loop(slice_out,
- [&](const Coordinates &id) {
- const size_t channel_id = id.x();
- const size_t in_x =
- id.y() * _block_shape + (channel_id / channel_size) % _block_shape;
- const size_t in_y =
- id.z() * _block_shape + (channel_id / channel_size) / _block_shape;
- const int z = channel_id % channel_size;
- Coordinates input_coords{z, in_x, in_y, batch_id};
- memcpy(out.ptr(), _input->ptr_to_element(input_coords), element_size);
- },
- out);
- ++batch_id;
- } while (window.slide_window_slice_3D(slice_out));
- }
-}
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLArgOperation.h"
-
-#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-namespace arm_compute
-{
-
-CLArgOperation::CLArgOperation()
-{
- // DO NOTHING
-}
-
-void CLArgOperation::configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis,
- ArgOperation op)
-{
- ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), axis, output->info(), op));
- _input = input;
- _output = output;
- _axis = axis;
- _arg_op = op;
- // NOTE The argminmax_axis must have no duplication.
- _num_of_kernels = axis.size();
- const size_t num_of_interm_tensors = _num_of_kernels - 1;
-
- _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
- _argop_kernels =
- arm_compute::support::cpp14::make_unique<CLArgOperationKernel[]>(_num_of_kernels);
-
- TensorShape shape{input->info()->tensor_shape()};
- for (size_t i = 0; i < num_of_interm_tensors; i++)
- {
- shape.set(_axis[i], 1);
- _interm_tensors[i].allocator()->init(
- TensorInfo(shape, input->info()->num_channels(), input->info()->data_type())
- .set_data_layout(input->info()->data_layout()));
- _interm_tensors[i].allocator()->allocate();
- }
-
- // Set a vector that is ordered ICLTensors sequentially.
- std::vector<ICLTensor *> tensors;
- tensors.emplace_back(input);
- for (size_t i = 0; i < num_of_interm_tensors; i++)
- {
- tensors.emplace_back(_interm_tensors.get() + i);
- }
- tensors.emplace_back(output);
-
- // Apply ArgMinMax on all kernels
- for (size_t i = 0; i < _num_of_kernels; i++)
- {
- _argop_kernels[i].configure(tensors[i], tensors[i + 1], _axis[i], op);
- }
-}
-
-Status CLArgOperation::validate(const ITensorInfo *input, const std::vector<uint32_t> &axis,
- const ITensorInfo *output, ArgOperation op)
-{
- const size_t num_of_kernels = axis.size();
- const size_t num_of_interm_tensors = num_of_kernels - 1;
-
- // Create temporary tensor infos
- auto interm_tensors =
- arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
-
- // Create intermediate tensor info
- TensorShape shape{input->tensor_shape()};
-
- for (size_t i = 0; i < num_of_interm_tensors; i++)
- {
- shape.set(axis[i], 1);
- interm_tensors[i].set_data_type(input->data_type());
- interm_tensors[i].set_tensor_shape(shape);
- interm_tensors[i].set_num_channels(input->num_channels());
- }
-
- // Set a vector that is ordered ITensorInfo sequentially.
- std::vector<const ITensorInfo *> tensors;
- tensors.emplace_back(input);
- for (size_t i = 0; i < num_of_interm_tensors; i++)
- {
- tensors.emplace_back(interm_tensors.get() + i);
- }
- tensors.emplace_back(output);
-
- // Validate argminmax only on all kernels
- for (size_t i = 0; i < num_of_kernels; i++)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(
- CLArgOperationKernel::validate(tensors[i], tensors[i + 1], axis[i], op));
- }
-
- return Status{};
-}
-
-void CLArgOperation::run()
-{
- for (size_t i = 0; i < _num_of_kernels; ++i)
- {
- CLScheduler::get().enqueue(_argop_kernels[i]);
- }
-}
-
-} // namespace arm_compute
void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
BinaryLogicalOperation op)
{
- auto k = arm_compute::support::cpp14::make_unique<CLBinaryLogicalOpKernel>();
+ auto k = support::cpp14::make_unique<CLBinaryLogicalOpKernel>();
k->configure(input1, input2, output, op);
_kernel = std::move(k);
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLCast.h"
-
-#include "arm_compute/core/CL/kernels/CLCastKernel.h"
-
-using namespace arm_compute;
-
-void CLCast::configure(ICLTensor *input, ICLTensor *output, SubDataType input_subtype)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLCastKernel>();
- k->configure(input, output, input_subtype);
- _kernel = std::move(k);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLDepthToSpace.h"
-
-#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h"
-
-using namespace arm_compute;
-
-void CLDepthToSpace::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLDepthToSpaceKernel>();
- k->configure(input, output, block_size);
- _kernel = std::move(k);
-}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/UtilsEx.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+#include <memory>
+#include <tuple>
+
+namespace arm_compute
+{
+using namespace arm_compute::misc::shape_calculator;
+
+CLDirectTransposeConvLayer::CLDirectTransposeConvLayer(
+ std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+ : _memory_group(std::move(memory_manager)),
+ _scale_f(),
+ _conv_f(),
+ _flip_weights(),
+ _scaled_output(),
+ _original_weights(nullptr),
+ _weights_flipped(),
+ _flip_axis(),
+ _is_prepared(false)
+{
+}
+
+Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *bias, ITensorInfo *output,
+ const PadStrideInfo &info, unsigned int invalid_right,
+ unsigned int invalid_bottom,
+ const WeightsInfo &weights_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
+ input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
+ const DataLayout data_layout = input->data_layout();
+
+ const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h));
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1);
+
+ auto out_dims = transposeconv_output_dimensions(
+ input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w),
+ weights->dimension(idx_h), info, invalid_right, invalid_bottom);
+
+ const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights);
+
+ if (bias != nullptr)
+ {
+ if (is_data_type_quantized_asymmetric(input->data_type()))
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+ }
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, bias);
+ }
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_w) != output_shape[idx_w],
+ "Output's width is invalid.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_h) != output_shape[idx_h],
+ "Output's height is invalid.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c],
+ "Output's depth is invalid.");
+
+ unsigned int pad_left = 0;
+ unsigned int pad_right = 0;
+ unsigned int pad_top = 0;
+ unsigned int pad_bottom = 0;
+ const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
+ *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top,
+ pad_bottom);
+ TensorInfo scale_out_info(input->clone()
+ ->set_is_resizable(true)
+ .reset_padding()
+ .set_tensor_shape(scale_out_shape)
+ .set_data_layout(data_layout));
+ const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+
+ ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate(input, &scale_out_info, info));
+ ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output,
+ conv_info, weights_info));
+
+ return Status{};
+}
+
+void CLDirectTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights,
+ const ICLTensor *bias, ICLTensor *output,
+ const PadStrideInfo &info, unsigned int invalid_right,
+ unsigned int invalid_bottom,
+ const WeightsInfo &weights_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, info,
+ invalid_right, invalid_bottom, weights_info);
+}
+
+void CLDirectTransposeConvLayer::configure(const CLCompileContext &compile_context,
+ ICLTensor *input, ICLTensor *weights,
+ const ICLTensor *bias, ICLTensor *output,
+ const PadStrideInfo &info, unsigned int invalid_right,
+ unsigned int invalid_bottom,
+ const WeightsInfo &weights_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+
+ unsigned int pad_left = 0;
+ unsigned int pad_right = 0;
+ unsigned int pad_top = 0;
+ unsigned int pad_bottom = 0;
+ const unsigned int stride_x = info.stride().first;
+ const unsigned int stride_y = info.stride().second;
+
+ const DataLayout data_layout = input->info()->data_layout();
+
+ const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+ _original_weights = weights;
+ _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
+ _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
+ _flip_weights.configure(compile_context, weights, &_weights_flipped, &_flip_axis);
+
+ auto out_dims = transposeconv_output_dimensions(
+ input->info()->dimension(idx_w), input->info()->dimension(idx_h),
+ weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right,
+ invalid_bottom);
+
+ const TensorShape output_shape =
+ compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
+
+ // Output auto initialization if not yet initialized
+ auto_init_if_empty(
+ *output->info(),
+ input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout));
+
+ // Perform validation step
+ ARM_COMPUTE_ERROR_THROW_ON(CLDirectTransposeConvLayer::validate(
+ input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(),
+ info, invalid_right, invalid_bottom));
+
+ _is_prepared = weights_info.retain_internal_weights();
+
+ _memory_group.manage(&_scaled_output);
+
+ // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order
+ // to match output shape
+ const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
+ *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
+ pad_right, pad_top, pad_bottom);
+
+ TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
+ input->info()->quantization_info());
+ scale_out_info.set_data_layout(data_layout);
+ _scaled_output.allocator()->init(scale_out_info);
+
+ // configure scale function
+ const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
+ DimensionRoundingType::FLOOR);
+ _scale_f.configure(input, &_scaled_output, upsample_info);
+
+ // Setup the function to convolve the upscaled output
+ const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+ _conv_f.configure(compile_context, &_scaled_output, &_weights_flipped, bias, output, conv_info,
+ weights_info);
+ _scaled_output.allocator()->allocate();
+
+ // Setup flip axis data
+ _flip_axis.allocator()->allocate();
+ _flip_axis.map(true);
+ auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
+ if (weights->info()->data_layout() == DataLayout::NHWC)
+ {
+ axis_data[0] = 1;
+ axis_data[1] = 2;
+ }
+ else
+ {
+ axis_data[0] = 0;
+ axis_data[1] = 1;
+ }
+ _flip_axis.unmap();
+}
+
+void CLDirectTransposeConvLayer::run()
+{
+ prepare();
+
+ MemoryGroupResourceScope scope_mg(_memory_group);
+
+ _scale_f.run();
+ _conv_f.run();
+}
+
+void CLDirectTransposeConvLayer::prepare()
+{
+ if (!_is_prepared)
+ {
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
+ // Run weights flipping and mark original weights tensor as unused
+ _weights_flipped.allocator()->allocate();
+ _flip_weights.run();
+ _original_weights->mark_as_unused();
+
+ // Prepare convolution
+ _conv_f.prepare();
+
+ // Free flipped weights
+ if (!_weights_flipped.is_used())
+ {
+ _weights_flipped.allocator()->free();
+ }
+
+ _is_prepared = true;
+ }
+}
+} // namespace arm_compute
void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output,
const ICLTensor *lookups)
{
- auto k = arm_compute::support::cpp14::make_unique<CLEmbeddingLookupKernel>();
+ auto k = support::cpp14::make_unique<CLEmbeddingLookupKernel>();
k->configure(input, output, lookups);
_kernel = std::move(k);
}
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
#include <algorithm>
ARM_COMPUTE_UNUSED(weights);
ARM_COMPUTE_UNUSED(output);
ARM_COMPUTE_RETURN_ON_ERROR(
- CLGEMMLowpMatrixMultiplyCoreEx::validate(&input, &weights, nullptr, &output));
+ CLGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
return Status{};
}
void CLFullyConnectedHybridLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output)
{
- auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>();
+ auto k = support::cpp14::make_unique<CLTransposeKernel>();
k->configure(input, output);
_kernel = std::move(k);
}
// Quantize input
_quantized_input.allocator()->init(
- input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8));
+ input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
+ DataType::QASYMM8_SIGNED));
_memory_group.manage(&_quantized_input);
_quant_input_kernel.configure(input, &_scale_factor, &_quantized_input);
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::S8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8_SIGNED);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
ARM_COMPUTE_RETURN_ON_ERROR(CLScaleFactorSymm8Kernel::validate(input, &scale_factor));
// Validate quantization symm8 kernel
- const ITensorInfo &quantized_input = TensorInfo(
- input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8));
+ const ITensorInfo &quantized_input =
+ TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type(
+ DataType::QASYMM8_SIGNED));
ARM_COMPUTE_RETURN_ON_ERROR(
CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input));
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
#include <algorithm>
void CLFullyConnectedLayerReshapeWeightsEx::configure(const ICLTensor *input, ICLTensor *output)
{
- auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>();
+ auto k = support::cpp14::make_unique<CLTransposeKernel>();
k->configure(input, output);
_kernel = std::move(k);
}
fc->configure(input_to_use, _weights, _biases, _output);
return std::unique_ptr<arm_compute::IFunction>(fc);
}
- else
+ else if (kernel_type == KernelType::PREPROCESSED_WEIGHTS)
{
- assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS);
-
bool is_hybrid = (input->info()->data_type() == DataType::F32 ||
input->info()->data_type() == DataType::F16) &&
- weights->info()->data_type() == DataType::S8;
+ (weights->info()->data_type() == DataType::S8 ||
+ weights->info()->data_type() == DataType::QASYMM8_SIGNED);
if (is_hybrid)
{
auto fc = new arm_compute::CLFullyConnectedHybridLayer{_memory_manager};
+ ITensorInfo *weights_info = const_cast<ITensorInfo *>(_weights->info());
+ const auto orgin_weights_data_type = weights_info->data_type();
+ weights_info->set_data_type(DataType::QASYMM8_SIGNED);
fc->configure(input_to_use, _weights, _biases, _output);
+ weights_info->set_data_type(orgin_weights_data_type);
return std::unique_ptr<arm_compute::IFunction>(fc);
}
else
return std::unique_ptr<arm_compute::IFunction>(fc);
}
}
+ else
+ {
+ throw std::runtime_error("CLFullyConnectedReshapingLayer: Unsupported kernel type");
+ }
+
}();
if (_needs_reshape)
+++ /dev/null
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-namespace arm_compute
-{
-using namespace arm_compute::misc::shape_calculator;
-using namespace arm_compute::cl_gemm;
-
-namespace
-{
-inline bool is_gemm_reshaped(bool reshape_b_only_on_first_run, GPUTarget gpu_target)
-{
- return (get_arch_from_target(gpu_target) != GPUTarget::MIDGARD) && (reshape_b_only_on_first_run);
-}
-} // namespace
-
-CLGEMMLowpMatrixMultiplyCoreEx::CLGEMMLowpMatrixMultiplyCoreEx(
- std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _mm_midgard_kernel(), _mtx_a_reduction_kernel(),
- _mtx_b_reduction_kernel(), _vector_sum_col(), _vector_sum_row(), _a_offset(0), _b_offset(0),
- _reshape_b_only_on_first_run(false), _is_prepared(false)
-{
-}
-
-void CLGEMMLowpMatrixMultiplyCoreEx::configure(const ICLTensor *a, const ICLTensor *b,
- const ICLTensor *c, ICLTensor *output,
- const GEMMInfo &gemm_info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
- ARM_COMPUTE_UNUSED(c);
- ARM_COMPUTE_ERROR_THROW_ON(CLGEMMLowpMatrixMultiplyCoreEx::validate(
- a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info));
-
- _is_prepared = false;
- _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
- _a_offset = a->info()->quantization_info().uniform().offset;
- _b_offset = b->info()->quantization_info().uniform().offset;
-
- // Get the GPU target
- const GPUTarget gpu_target = CLScheduler::get().target();
-
- // Set the target for the kernels
- _mm_midgard_kernel.set_target(gpu_target);
-
- // GEMMRHSMatrixInfo rhs_info;
- // GEMMLHSMatrixInfo lhs_info;
-
- // Arguments used by GEMMReshapeInfo
- // If we pass the matrix A and matrix B reshaped to CLGEMMMatrixMultiplyKernel, we need to pass m,
- // n, k, mult_transpose1xW_width and mult_interleave4x4_height to CLGEMMReshapeInfo
- // in order to know how the matrices have been reshaped
- bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d
- ? (a->info()->dimension(1) * a->info()->dimension(2))
- : a->info()->dimension(1);
- const unsigned int n = b->info()->dimension(0);
- const unsigned int k = a->info()->dimension(0);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
-
- const ICLTensor *matrix_b = b;
- // Configure matrix multiply kernel
- _mm_midgard_kernel.configure(
- a, matrix_b, output,
- GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
-}
-
-Status CLGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITensorInfo *b,
- const ITensorInfo *c, const ITensorInfo *output,
- const GEMMInfo &gemm_info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::S8);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b);
- ARM_COMPUTE_UNUSED(c);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(),
- "Matrix A already reshaped is not supported");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(),
- "Matrix B already reshaped is not supported");
-
- const ITensorInfo *matrix_a_info = a;
-
- // Get the GPU target
- const GPUTarget gpu_target = CLScheduler::get().target();
-
- bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m =
- reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
-
- bool reshape_matrix_b = is_gemm_reshaped(gemm_info.reshape_b_only_on_first_run(), gpu_target);
-
- const GEMMReshapeInfo reshape_info =
- GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d);
-
- TensorInfo weights_info(*b);
- const ITensorInfo *matrix_b_info = &weights_info;
- if (reshape_matrix_b)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(false,
- "CLGEMMLowpMatrixMultiplyCoreEx does not support reshape_b");
- }
-
- // Validate matrix multiply
- ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyKernelEx::validate(
- matrix_a_info, matrix_b_info, output, reshape_info));
-
- return Status{};
-}
-
-void CLGEMMLowpMatrixMultiplyCoreEx::run()
-{
- prepare();
-
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- // Run matrix multiply
- CLScheduler::get().enqueue(_mm_midgard_kernel, false);
-}
-
-void CLGEMMLowpMatrixMultiplyCoreEx::prepare()
-{
- if (!_is_prepared)
- {
- _is_prepared = true;
- }
-}
-} // namespace arm_compute
void CLGatherEx::configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output,
int axis)
{
- auto k = arm_compute::support::cpp14::make_unique<CLGatherExKernel>();
+ auto k = support::cpp14::make_unique<CLGatherExKernel>();
k->configure(input, indices, output, axis);
_kernel = std::move(k);
}
void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys,
const ICLTensor *input, ICLTensor *output, ICLTensor *hits)
{
- auto k = arm_compute::support::cpp14::make_unique<CLHashtableLookupKernel>();
+ auto k = support::cpp14::make_unique<CLHashtableLookupKernel>();
k->configure(lookups, keys, input, output, hits);
_kernel = std::move(k);
}
void CLInstanceNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output,
ICLTensor *gamma, ICLTensor *beta, float epsilon)
{
- auto k = arm_compute::support::cpp14::make_unique<CLInstanceNormalizationLayerKernelEx>();
+ auto k = support::cpp14::make_unique<CLInstanceNormalizationLayerKernelEx>();
k->configure(input, output, gamma, beta, epsilon);
_kernel = std::move(k);
}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLPReLU.h"
-
-#include "arm_compute/core/CL/kernels/CLPReLUKernel.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-void CLPReLU::configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLPReLUKernel>();
- k->configure(input, alpha, output);
- _kernel = std::move(k);
-
- if (output->info()->dimension(0) > 1)
- {
- ICLTensor *broadcasted_info = (input->info()->dimension(0) == 1) ? input : alpha;
-
- if (broadcasted_info->info()->dimension(0) == 1)
- {
- _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
- }
- }
-}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLRNNLayerEx.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/ToolchainSupport.h"
-
-#include <utility>
-
-using namespace arm_compute;
-using namespace arm_compute::misc::shape_calculator;
-
-CLRNNLayerEx::CLRNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(),
- _activation_kernel(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(),
- _gemm_output(), _add_output(), _is_prepared(false)
-{
-}
-
-Status CLRNNLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights,
- const ITensorInfo *recurrent_weights, const ITensorInfo *bias,
- const ITensorInfo *hidden_state, const ITensorInfo *output,
- const ActivationLayerInfo &info)
-{
- const int idx_width = 0;
- const int idx_height = 1;
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state,
- output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_width) != weights->dimension(idx_width));
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_height) !=
- recurrent_weights->dimension(idx_width));
- ARM_COMPUTE_RETURN_ERROR_ON(recurrent_weights->dimension(idx_width) !=
- recurrent_weights->dimension(1));
- ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() != 1);
- ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(idx_width) != weights->dimension(idx_height));
- ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_width) != weights->dimension(idx_height));
- ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_height) != input->dimension(idx_height));
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(),
- hidden_state->tensor_shape());
-
- auto shape_info =
- TensorInfo(compute_rnn_shape(recurrent_weights, hidden_state->dimension(idx_height)), 1,
- input->data_type());
-
- ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, weights, bias, &shape_info));
- ARM_COMPUTE_RETURN_ON_ERROR(
- CLGEMM::validate(hidden_state, recurrent_weights, nullptr, &shape_info, 1.f, 0.f));
- ARM_COMPUTE_RETURN_ON_ERROR(CLSaturatedArithmeticOperationKernel::validate(
- ArithmeticOperation::ADD, &shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE));
- ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&shape_info, &shape_info, info));
-
- return Status{};
-}
-
-void CLRNNLayerEx::configure(const ICLTensor *input, const ICLTensor *weights,
- const ICLTensor *recurrent_weights, const ICLTensor *bias,
- ICLTensor *hidden_state, ICLTensor *output, ActivationLayerInfo &info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output);
- ARM_COMPUTE_ERROR_THROW_ON(CLRNNLayerEx::validate(input->info(), weights->info(),
- recurrent_weights->info(), bias->info(),
- hidden_state->info(), output->info(), info));
-
- const int idx_height = 1;
- TensorShape shape =
- compute_rnn_shape(recurrent_weights->info(), hidden_state->info()->dimension(idx_height));
-
- _is_prepared = false;
-
- _fully_connected_out.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
- _gemm_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
-
- // Manage intermediate buffers and configure
- _memory_group.manage(&_fully_connected_out);
- _fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out);
-
- _memory_group.manage(&_gemm_output);
- _gemm_state_f.configure(hidden_state, recurrent_weights, nullptr, &_gemm_output, 1.f, 0.f);
-
- _add_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
- _memory_group.manage(&_add_output);
-
- _add_kernel.configure(ArithmeticOperation::ADD, &_fully_connected_out, &_gemm_output,
- &_add_output, ConvertPolicy::SATURATE);
-
- _fully_connected_out.allocator()->allocate();
- _gemm_output.allocator()->allocate();
-
- _activation_kernel.configure(&_add_output, hidden_state, info);
- _add_output.allocator()->allocate();
-
- _copy_kernel.configure(hidden_state, output);
-}
-
-void CLRNNLayerEx::run()
-{
- prepare();
-
- _memory_group.acquire();
-
- _fully_connected_kernel.run();
- _gemm_state_f.run();
- CLScheduler::get().enqueue(_add_kernel);
- CLScheduler::get().enqueue(_activation_kernel);
-
- // copy hidden out to output
- CLScheduler::get().enqueue(_copy_kernel);
-
- _memory_group.release();
-}
-
-void CLRNNLayerEx::prepare()
-{
- if (!_is_prepared)
- {
- _fully_connected_kernel.prepare();
- _gemm_state_f.prepare();
-
- _is_prepared = true;
- }
-}
const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0);
// Create temporary tensor infos
- auto interm_tensors =
- arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
+ auto interm_tensors = support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
// Create intermediate tensor info
TensorShape shape{input->tensor_shape()};
const size_t num_of_kernels = axis.size();
const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0);
- _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
- _reduce_kernels =
- arm_compute::support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
+ _interm_tensors = support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
+ _reduce_kernels = support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
// Set a vector that is ordered ICLTensors sequentially.
std::vector<ICLTensor *> tensors;
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLSpaceToDepth.h"
-
-#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h"
-
-using namespace arm_compute;
-
-void CLSpaceToDepth::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLSpaceToDepthKernel>();
- k->configure(input, output, block_size);
- _kernel = std::move(k);
-}
*/
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-
#include "arm_compute/runtime/CL/functions/CLTransposeConvLayer.h"
-#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
-#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/UtilsEx.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CPP/CPPScheduler.h"
+#include <cmath>
#include <memory>
#include <tuple>
using namespace arm_compute;
using namespace arm_compute::misc::shape_calculator;
-CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
- : _memory_group(std::move(memory_manager)),
- _scale_f(),
- _conv_f(),
- _flip_weights(),
- _scaled_output(),
- _original_weights(nullptr),
- _weights_flipped(),
- _is_prepared(false)
+CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_manager(std::move(memory_manager)), _function()
+{
+}
+
+void CLTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias,
+ ICLTensor *output, const PadStrideInfo &deconv_info,
+ unsigned int invalid_right, unsigned int invalid_bottom,
+ const WeightsInfo &weights_info)
{
+ configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, deconv_info,
+ invalid_right, invalid_bottom, weights_info);
+}
+
+void CLTransposeConvLayer::configure(const CLCompileContext &compile_context, ICLTensor *input,
+ ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
+ const PadStrideInfo &deconv_info, unsigned int invalid_right,
+ unsigned int invalid_bottom, const WeightsInfo &weights_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+
+ switch (CLTransposeConvLayer::get_deconvolution_method(input->info(), weights->info(), nullptr,
+ output->info(), deconv_info, invalid_right,
+ invalid_bottom, weights_info))
+ {
+ case DeconvolutionMethod::DIRECT:
+ {
+ auto f = arm_compute::support::cpp14::make_unique<CLDirectTransposeConvLayer>();
+ f->configure(compile_context, input, weights, bias, output, deconv_info, invalid_right,
+ invalid_bottom, weights_info);
+ _function = std::move(f);
+ break;
+ }
+ case DeconvolutionMethod::GEMM:
+ {
+ auto f = arm_compute::support::cpp14::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager);
+ f->configure(compile_context, input, weights, bias, output, deconv_info);
+ _function = std::move(f);
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Not supported.");
+ break;
+ }
}
Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights,
const ITensorInfo *bias, ITensorInfo *output,
- const PadStrideInfo &info, unsigned int invalid_right,
+ const PadStrideInfo &deconv_info, unsigned int invalid_right,
unsigned int invalid_bottom, const WeightsInfo &weights_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
-
- const DataLayout data_layout = input->data_layout();
-
- const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
-
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h));
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1);
-
- const unsigned int kernel_x = weights->dimension(idx_w);
- const unsigned int kernel_y = weights->dimension(idx_h);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(invalid_right > kernel_x - 1,
- "invalid_right must be smaller than kernel_x");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(invalid_bottom > kernel_y - 1,
- "inner_border_top must be smaller than kernel_y");
-
- // NOTE From the existing CLDeconvolutionLayer, invalid_right and invalid_bottom were added.
- auto out_dims = transposeconv_output_dimensions(
- input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w),
- weights->dimension(idx_h), info, invalid_right, invalid_bottom);
-
- const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights);
-
- if (bias != nullptr)
+ switch (CLTransposeConvLayer::get_deconvolution_method(
+ input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info))
{
- if (is_data_type_quantized_asymmetric(input->data_type()))
+ case DeconvolutionMethod::DIRECT:
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
+ // Validate direct convolution layer
+ ARM_COMPUTE_RETURN_ON_ERROR(CLDirectTransposeConvLayer::validate(
+ input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info));
+ break;
}
- else
+ case DeconvolutionMethod::GEMM:
{
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+ // Validate gemm-based convolution layer
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info));
+ break;
}
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, bias);
+ default:
+ ARM_COMPUTE_ERROR("Not supported.");
+ break;
}
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_w) != output_shape[idx_w],
- "Output's width is invalid.");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_h) != output_shape[idx_h],
- "Output's height is invalid.");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c],
- "Output's depth is invalid.");
-
- unsigned int pad_left = 0;
- unsigned int pad_right = 0;
- unsigned int pad_top = 0;
- unsigned int pad_bottom = 0;
- const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top,
- pad_bottom);
- TensorInfo scale_out_info(input->clone()
- ->set_is_resizable(true)
- .reset_padding()
- .set_tensor_shape(scale_out_shape)
- .set_data_layout(data_layout));
- const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
-
- ARM_COMPUTE_RETURN_ON_ERROR(
- CLTransposeConvLayerUpsample::validate(input, &scale_out_info, BorderSize(0, 0), info));
- ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output,
- conv_info, weights_info));
-
return Status{};
}
-void CLTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias,
- ICLTensor *output, const PadStrideInfo &info,
- unsigned int invalid_right, unsigned int invalid_bottom,
- const WeightsInfo &weights_info)
+DeconvolutionMethod CLTransposeConvLayer::get_deconvolution_method(
+ const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias,
+ ITensorInfo *output, const PadStrideInfo &deconv_info, unsigned int invalid_right,
+ unsigned int invalid_bottom, const WeightsInfo &weights_info)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
-
- const unsigned int stride_x = info.stride().first;
- const unsigned int stride_y = info.stride().second;
+ ARM_COMPUTE_UNUSED(output, bias, weights_info);
- const DataLayout data_layout = input->info()->data_layout();
+ const DataLayout data_layout = input->data_layout();
const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- _original_weights = weights;
- _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
- _flip_weights.configure(weights, &_weights_flipped);
-
- // NOTE From the existing CLDeconvolutionLayer, invalid_right and invalid_bottom were
- // added.
- auto out_dims = transposeconv_output_dimensions(
- input->info()->dimension(idx_w), input->info()->dimension(idx_h),
- weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right,
- invalid_bottom);
-
- const TensorShape output_shape =
- compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
-
- // Output auto initialization if not yet initialized
- auto_init_if_empty(
- *output->info(),
- input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout));
-
- // Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(CLTransposeConvLayer::validate(
- input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(),
- info, invalid_right, invalid_bottom));
-
- _is_prepared = weights_info.retain_internal_weights();
-
- _memory_group.manage(&_scaled_output);
-
- // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order
- // to match output shape
- unsigned int pad_left = 0;
- unsigned int pad_right = 0;
- unsigned int pad_top = 0;
- unsigned int pad_bottom = 0;
- const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
- pad_right, pad_top, pad_bottom);
-
- TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
- input->info()->quantization_info());
- scale_out_info.set_data_layout(data_layout);
- _scaled_output.allocator()->init(scale_out_info);
-
- // configure scale function
- const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
- DimensionRoundingType::FLOOR);
- _scale_f.configure(input, &_scaled_output, BorderSize(0, 0), upsample_info);
-
- // setup the function to convolve the upscaled output
- const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
- _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info, weights_info);
- _scaled_output.allocator()->allocate();
+ if (weights->dimension(idx_w) != deconv_info.stride().first ||
+ weights->dimension(idx_h) != deconv_info.stride().second || invalid_right != 0 ||
+ invalid_bottom != 0)
+ {
+ return DeconvolutionMethod::DIRECT;
+ }
+
+ return DeconvolutionMethod::GEMM;
}
void CLTransposeConvLayer::run()
{
prepare();
-
- _memory_group.acquire();
-
- _scale_f.run();
- _conv_f.run();
-
- _memory_group.release();
+ _function->run();
}
-void CLTransposeConvLayer::prepare()
-{
- if (!_is_prepared)
- {
- ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
-
- // Run weights flipping and mark original weights tensor as unused
- _weights_flipped.allocator()->allocate();
- _weights_flipped.map(true);
- _original_weights->map(CLScheduler::get().queue(), true);
- CPPScheduler::get().schedule(&_flip_weights, Window::DimZ);
- _weights_flipped.unmap();
- _original_weights->unmap(CLScheduler::get().queue());
- _original_weights->mark_as_unused();
-
- // Prepare convolution
- _conv_f.prepare();
-
- if (!_weights_flipped.is_used())
- {
- _weights_flipped.allocator()->free();
- }
-
- _is_prepared = true;
- }
-}
+void CLTransposeConvLayer::prepare() { _function->prepare(); }
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h"
-
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-#include <cmath>
-#include <memory>
-#include <tuple>
-
-using namespace arm_compute;
-
-CLTransposeConvLayerUpsample::CLTransposeConvLayerUpsample() // NOLINT
- : _upsample(),
- _output(nullptr)
-{
-}
-
-Status CLTransposeConvLayerUpsample::validate(const ITensorInfo *input, const ITensorInfo *output,
- const BorderSize &inner_border,
- const PadStrideInfo &info)
-{
- return CLTransposeConvLayerUpsampleKernel::validate(input, output, inner_border, info);
-}
-
-void CLTransposeConvLayerUpsample::configure(ICLTensor *input, ICLTensor *output,
- const BorderSize &inner_border,
- const PadStrideInfo &info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- _output = output;
- _upsample.configure(input, _output, inner_border, info);
-}
-
-void CLTransposeConvLayerUpsample::run()
-{
- _output->map(CLScheduler::get().queue(), true);
- if (is_data_type_quantized_asymmetric(_output->info()->data_type()))
- {
- const uint8_t quantized_zero = _output->info()->quantization_info().uniform().offset;
- std::fill_n(_output->buffer(), _output->info()->total_size(), quantized_zero);
- }
- else
- {
- memset(_output->buffer(), 0, _output->info()->total_size());
- }
- _output->unmap(CLScheduler::get().queue());
-
- CLScheduler::get().enqueue(_upsample, false);
-}
#include "arm_compute/runtime/CPP/functions/CPPOneHotEx.h"
#include "arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
void CPPOneHotEx::configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
const ITensor *off_value, ITensor *output, const int axis)
{
- auto k = arm_compute::support::cpp14::make_unique<CPPOneHotKernelEx>();
+ auto k = support::cpp14::make_unique<CPPOneHotKernelEx>();
k->configure(indices, depth, on_value, off_value, output, axis);
_kernel = std::move(k);
}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CPP/functions/CPPUpsampleEx.h"
-
-#include "arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h"
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-
-void CPPUpsampleEx::configure(const ITensor *input, ITensor *output, const PadStrideInfo &info)
-{
- auto k = arm_compute::support::cpp14::make_unique<CPPUpsampleKernelEx>();
- k->configure(input, output, info);
- _kernel = std::move(k);
-}
#include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h"
#include "arm_compute/runtime/IRuntimeContext.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
void NEActivationLayerEx::configure(ITensor *input, ITensor *output,
ActivationLayerInfo activation_info)
{
- auto k = arm_compute::support::cpp14::make_unique<NEActivationLayerKernelEx>();
+ auto k = support::cpp14::make_unique<NEActivationLayerKernelEx>();
k->configure(input, output, activation_info);
_kernel = std::move(k);
}
#include <arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h>
#include "arm_compute/core/ITensor.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
#include <utility>
void NEBinaryLogicalOperationStatic<COP>::configure(ITensor *input1, ITensor *input2,
ITensor *output)
{
- auto k = arm_compute::support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
+ auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
k->configure(COP, input1, input2, output);
_kernel = std::move(k);
}
void NEBinaryLogicalOperation::configure(ITensor *input1, ITensor *input2, ITensor *output,
BinaryLogicalOperation op)
{
- auto k = arm_compute::support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
+ auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
k->configure(op, input1, input2, output);
_kernel = std::move(k);
}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NECast.h"
-
-#include "arm_compute/core/NEON/kernels/NECastKernel.h"
-#include "support/ToolchainSupport.h"
-
-namespace arm_compute
-{
-void NECast::configure(const ITensor *input, ITensor *output, SubDataType input_subtype)
-{
- auto k = arm_compute::support::cpp14::make_unique<NECastKernel>();
- k->configure(input, output, input_subtype);
- _kernel = std::move(k);
-}
-
-Status NECast::validate(const ITensorInfo *input, const ITensorInfo *output,
- SubDataType input_subtype)
-{
- return NECastKernel::validate(input, output, input_subtype);
-}
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-
-namespace arm_compute
-{
-void NEDepthToSpaceLayerEx::configure(const ITensor *input, ITensor *output, int32_t block_shape)
-{
- auto k = arm_compute::support::cpp14::make_unique<NEDepthToSpaceLayerKernelEx>();
- k->configure(input, output, block_shape);
- _kernel = std::move(k);
-}
-
-Status NEDepthToSpaceLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
- int32_t block_shape)
-{
- return NEDepthToSpaceLayerKernelEx::validate(input, output, block_shape);
-}
-} // namespace arm_compute
#include "arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h"
#include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
void NEEmbeddingLookup::configure(const ITensor *input, ITensor *output, const ITensor *lookups)
{
- auto k = arm_compute::support::cpp14::make_unique<NEEmbeddingLookupKernel>();
+ auto k = support::cpp14::make_unique<NEEmbeddingLookupKernel>();
k->configure(input, output, lookups);
_kernel = std::move(k);
}
Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output)
{
ARM_COMPUTE_RETURN_ON_ERROR(
- NEGEMMLowpMatrixMultiplyCoreEx::validate(&input, &weights, nullptr, &output));
+ NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
return Status{};
}
void NEFullyConnectedHybridLayerReshapeWeights::configure(const ITensor *input, ITensor *output)
{
- auto k = arm_compute::support::cpp14::make_unique<NETransposeKernel>();
+ auto k = support::cpp14::make_unique<NETransposeKernel>();
k->configure(input, output);
_kernel = std::move(k);
}
// Quantize input
_quantized_input.allocator()->init(
- input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8));
+ input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
+ DataType::QASYMM8_SIGNED));
_scale_factor.allocator()->init(
TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32));
_quant_input_kernel.configure(input, &_quantized_input, &_scale_factor);
ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::S8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8_SIGNED);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 2);
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
// Validate quantization kernel
- const ITensorInfo &quantized_input = TensorInfo(
- input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8));
+ const ITensorInfo &quantized_input =
+ TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type(
+ DataType::QASYMM8_SIGNED));
const ITensorInfo &scale_factor = TensorInfo(TensorShape{output->dimension(1)}, 1, DataType::F32);
ARM_COMPUTE_RETURN_ON_ERROR(
NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor));
assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS);
bool is_hybrid = input->info()->data_type() == DataType::F32 &&
- weights->info()->data_type() == DataType::S8;
+ (weights->info()->data_type() == DataType::S8 ||
+ weights->info()->data_type() == DataType::QASYMM8_SIGNED);
if (is_hybrid)
{
auto fc = new arm_compute::NEFullyConnectedHybridLayer{_memory_manager};
+ ITensorInfo *weights_info = const_cast<ITensorInfo *>(_weights->info());
+ const auto orgin_weights_data_type = weights_info->data_type();
+ weights_info->set_data_type(DataType::QASYMM8_SIGNED);
fc->configure(input_to_use, _weights, _biases, _output);
+ weights_info->set_data_type(orgin_weights_data_type);
return std::unique_ptr<arm_compute::IFunction>(fc);
}
else
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-using namespace arm_compute::misc::shape_calculator;
-
-NEGEMMLowpMatrixMultiplyCoreEx::NEGEMMLowpMatrixMultiplyCoreEx(
- std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(memory_manager), _asm_glue(memory_manager), _mm_kernel(nullptr),
- _mtx_a_reshape_kernel(nullptr), _mtx_b_reshape_kernel(nullptr), _mtx_a_reduction_kernel(),
- _mtx_b_reduction_kernel(), _offset_contribution_kernel(),
- _offset_contribution_output_stage_kernel(), _vector_sum_col(), _vector_sum_row(), _tmp_a(),
- _tmp_b(), _mm_result_s32(), _signed_a(), _signed_output(), _original_b(nullptr), _a_offset(0),
- _b_offset(0), _run_vector_matrix_multiplication(false), _assembly_path(false),
- _fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false),
- _fuse_output_stage(false), _flip_signedness(false)
-{
-}
-
-void NEGEMMLowpMatrixMultiplyCoreEx::configure(const ITensor *a, const ITensor *b, const ITensor *c,
- ITensor *output, const GEMMInfo &gemm_info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
- ARM_COMPUTE_UNUSED(c);
- ARM_COMPUTE_ERROR_THROW_ON(NEGEMMLowpMatrixMultiplyCoreEx::validate(
- a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info));
-
- const ITensor *matrix_a = a;
- const ITensor *matrix_b = b;
- GEMMInfo info = gemm_info;
-
- // Clear state
- _mtx_a_reshape_kernel = nullptr;
- _mtx_b_reshape_kernel = nullptr;
-
- // Set internal variables
- _a_offset = a->info()->quantization_info().uniform().offset;
- _b_offset = b->info()->quantization_info().uniform().offset;
- _run_vector_matrix_multiplication = a->info()->dimension(1) < 2;
- _reshape_b_only_on_first_run = info.reshape_b_only_on_first_run();
- _is_prepared = false;
- _fused_assembly_path = false;
- _original_b = b;
-
- const ITensor *a_to_use = a;
-
- // If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage
- if (info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE)
- {
- _fuse_output_stage = true;
- _memory_group.manage(&_mm_result_s32);
- TensorInfo info_mm_result_s32(output->info()->tensor_shape(), 1, DataType::S32);
- _mm_result_s32.allocator()->init(info_mm_result_s32);
- }
-
-#ifdef __aarch64__
- switch (a->info()->data_type())
- {
- case DataType::QASYMM8:
- case DataType::QASYMM8_SIGNED:
- case DataType::U8:
- case DataType::S8:
- {
- if (a_to_use->info()->data_type() == DataType::QASYMM8 &&
- info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
- {
- _asm_glue.configure(a_to_use, b, c, output, gemm_info);
- _fused_assembly_path = _asm_glue.is_configured();
- }
- else
- {
- _asm_glue.configure(a_to_use, b, nullptr, _fuse_output_stage ? &_mm_result_s32 : output,
- gemm_info);
- }
- _assembly_path = _asm_glue.is_configured();
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("Datatype not supported");
- break;
- }
- }
-#endif /* __aarch64__ */
- if (!(_assembly_path || _run_vector_matrix_multiplication))
- {
- matrix_a = &_tmp_a;
- matrix_b = &_tmp_b;
-
- // The interleaved output matrix will have the following shape: [ a_height * 4, ceil(a_width /
- // 4.0f) ]
- TensorInfo a_info(compute_interleaved_shape(*a_to_use->info()), 1,
- a_to_use->info()->data_type(), a_to_use->info()->quantization_info());
- // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width /
- // 16.0f) ]
- TensorInfo b_info(compute_transpose1xW_shape(*b->info()), 1, b->info()->data_type(),
- b->info()->quantization_info());
- _tmp_a.allocator()->init(a_info);
- _tmp_b.allocator()->init(b_info);
- _memory_group.manage(&_tmp_a);
- if (!_reshape_b_only_on_first_run)
- {
- _memory_group.manage(&_tmp_b);
- }
-
- // Configure interleave kernel
- {
- auto k = arm_compute::support::cpp14::make_unique<NEGEMMInterleave4x4Kernel>();
- k->configure(a_to_use, &_tmp_a);
- _mtx_a_reshape_kernel = std::move(k);
- }
-
- // Configure transpose kernel
- {
- auto k = arm_compute::support::cpp14::make_unique<NEGEMMTranspose1xWKernel>();
- k->configure(b, &_tmp_b);
- _mtx_b_reshape_kernel = std::move(k);
- }
- }
-
- if (!_fused_assembly_path)
- {
- // Initialize matrix B reduction kernel only if _a_offset is not equal to 0
- if (_a_offset != 0)
- {
- TensorInfo info_vector_sum_col(compute_reductionA_shape(*b->info()), 1, DataType::S32);
-
- _vector_sum_col.allocator()->init(info_vector_sum_col);
- if (!_reshape_b_only_on_first_run)
- {
- _memory_group.manage(&_vector_sum_col);
- }
-
- // Configure Matrix B reduction kernel
- _mtx_b_reduction_kernel.configure(b, &_vector_sum_col, a_to_use->info()->dimension(0), false);
- }
-
- // Initialize Matrix A reduction kernel only if _b_offset is not equal to 0
- if (_b_offset != 0)
- {
- TensorInfo info_vector_sum_row(compute_reductionB_shape(*a_to_use->info()), 1, DataType::S32);
-
- _vector_sum_row.allocator()->init(info_vector_sum_row);
- _memory_group.manage(&_vector_sum_row);
-
- // Configure matrix A reduction kernel
- _mtx_a_reduction_kernel.configure(a_to_use, &_vector_sum_row, a_to_use->info()->dimension(0),
- false);
- }
-
- if (_fuse_output_stage)
- {
- // Configure matrix multiply kernel
- if (!_assembly_path)
- {
- auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>();
- k->configure(matrix_a, matrix_b, &_mm_result_s32);
- _mm_kernel = std::move(k);
- }
-
- _offset_contribution_output_stage_kernel.configure(
- &_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col,
- _b_offset == 0 ? nullptr : &_vector_sum_row, c,
- _flip_signedness ? &_signed_output : output, a->info()->dimension(0), _a_offset,
- _b_offset, info.gemmlowp_output_stage());
- }
- else
- {
- // Configure matrix multiply kernel
- if (!_assembly_path)
- {
- auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>();
- k->configure(matrix_a, matrix_b, output);
- _mm_kernel = std::move(k);
- }
- // Configure offset contribution kernel
- _offset_contribution_kernel.configure(output, _a_offset == 0 ? nullptr : &_vector_sum_col,
- _b_offset == 0 ? nullptr : &_vector_sum_row,
- a_to_use->info()->dimension(0), _a_offset, _b_offset);
- }
- }
-
- // Allocate tensors
- if (!_assembly_path && !_run_vector_matrix_multiplication)
- {
- _tmp_a.allocator()->allocate();
- if (!_reshape_b_only_on_first_run)
- {
- _tmp_b.allocator()->allocate();
- }
- }
-
- if (!_fused_assembly_path)
- {
- if (_a_offset != 0 && !_reshape_b_only_on_first_run)
- {
- _vector_sum_col.allocator()->allocate();
- }
-
- if (_b_offset != 0)
- {
- _vector_sum_row.allocator()->allocate();
- }
- }
-
- if (_fuse_output_stage)
- {
- _mm_result_s32.allocator()->allocate();
- }
-}
-
-Status NEGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITensorInfo *b,
- const ITensorInfo *c, const ITensorInfo *output,
- const GEMMInfo &gemm_info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::S8);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::S8);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- c != nullptr && gemm_info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::NONE,
- "Bias addition not supported in NEGEMMLowpMatrixMultiplyCoreEx for output S32");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((a)->dimension(0) != (b)->dimension(1),
- "The product AB is defined only if the number of columns in A is "
- "equal to the number of rows in B");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(),
- "Matrix A already reshaped is not supported");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(),
- "Matrix B already reshaped is not supported");
-
- GEMMInfo info = gemm_info;
- const ITensorInfo *matrix_a_info = a;
- const ITensorInfo *matrix_b_info = b;
-
- const ITensorInfo *a_to_use = a;
-
- TensorInfo tmp_a_info{};
- TensorInfo tmp_b_info{};
- TensorInfo mm_result_s32_info{};
-
- int32_t a_offset = a->quantization_info().uniform().offset;
- int32_t b_offset = b->quantization_info().uniform().offset;
-
- bool fuse_output_stage = info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE;
- if (fuse_output_stage)
- {
- auto_init_if_empty(
- mm_result_s32_info,
- a->clone()->set_tensor_shape(output->tensor_shape()).set_data_type(DataType::S32));
- }
-
- // Check if we need to run the optimized assembly kernel
- bool run_optimised = false;
- bool run_optimised_requantized = false;
- if (a_to_use->data_type() == DataType::QASYMM8 &&
- info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
- {
- run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, c, output, gemm_info));
- run_optimised_requantized = run_optimised;
- }
- else
- {
- run_optimised = bool(NEGEMMAssemblyDispatch::validate(
- a_to_use, b, c, fuse_output_stage ? &mm_result_s32_info : output, gemm_info));
- }
-
- if (run_optimised)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(b->dimension(0) != output->dimension(0));
- if (info.depth_output_gemm3d() != 0)
- {
- if (info.reinterpret_input_as_3d())
- {
- ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1));
- ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(2) != output->dimension(2));
- }
- else
- {
- ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1) * output->dimension(2));
- }
- }
- else
- {
- ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1));
- }
- }
- else
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.reinterpret_input_as_3d(),
- "NEGEMM cannot reinterpret the input tensor as 3D");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.depth_output_gemm3d() != 0,
- "NEGEMM cannot reinterpret the output tensor as 3D");
-
- const bool run_vector_matrix_multiplication = a->dimension(1) < 2;
- if (!run_vector_matrix_multiplication)
- {
- matrix_a_info = &tmp_a_info;
- matrix_b_info = &tmp_b_info;
-
- // The interleaved output matrix will have the following shape: [ a_height * 4, ceil(a_width /
- // 4.0f) ]
- TensorShape shape_tmp_a = a->tensor_shape();
- shape_tmp_a.set(0, a->dimension(0) * 4);
- shape_tmp_a.set(1, std::ceil(a->dimension(1) / 4.f));
-
- // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width
- // / 16.0f) ]
- TensorShape shape_tmp_b = b->tensor_shape();
- shape_tmp_b.set(0, b->dimension(1) * 16);
- shape_tmp_b.set(1, std::ceil(b->dimension(0) / 16.f));
-
- // Validate interleave kernel
- auto_init_if_empty(tmp_a_info, a_to_use->clone()->set_tensor_shape(shape_tmp_a));
- auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(shape_tmp_b));
-
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMInterleave4x4Kernel::validate(a_to_use, &tmp_a_info));
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMTranspose1xWKernel::validate(b, &tmp_b_info));
- }
- }
-
- if (!run_optimised_requantized)
- {
- TensorInfo info_vector_sum_col{};
- TensorInfo info_vector_sum_row{};
-
- // Validate matrix B reduction kernel only if _a_offset is not equal to 0
- if (a_offset != 0)
- {
- info_vector_sum_col = TensorInfo(compute_reductionA_shape(*b), 1, DataType::S32);
-
- // Configure Matrix B reduction kernel
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixBReductionKernel::validate(
- b, &info_vector_sum_col, a->dimension(0), false));
- }
-
- // Validate Matrix A reduction kernel only if _b_offset is not equal to 0
- if (b_offset != 0)
- {
- info_vector_sum_row = TensorInfo(compute_reductionB_shape(*a), 1, DataType::S32);
-
- // Configure matrix A reduction kernel
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(
- a_to_use, &info_vector_sum_row, a->dimension(0), false));
- }
-
- if (fuse_output_stage)
- {
- if (!run_optimised)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyKernel::validate(
- matrix_a_info, matrix_b_info, &mm_result_s32_info));
- }
-
- // Validate offset contribution kernel
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOffsetContributionOutputStageKernel::validate(
- &mm_result_s32_info, a_offset == 0 ? nullptr : &info_vector_sum_col,
- b_offset == 0 ? nullptr : &info_vector_sum_row, c, output, a_offset, b_offset,
- info.gemmlowp_output_stage()));
- }
- else
- {
- if (!run_optimised)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(
- NEGEMMLowpMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, output));
- }
- // Validate offset contribution kernel
- ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOffsetContributionKernel::validate(
- output, a_offset == 0 ? nullptr : &info_vector_sum_col,
- b_offset == 0 ? nullptr : &info_vector_sum_row, a_offset, b_offset));
- }
- }
- return Status{};
-}
-
-void NEGEMMLowpMatrixMultiplyCoreEx::run()
-{
- prepare();
-
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- // Reshape inputs
- if (_mtx_a_reshape_kernel)
- {
- NEScheduler::get().schedule(_mtx_a_reshape_kernel.get(), Window::DimY);
- }
- if (_mtx_b_reshape_kernel && !_reshape_b_only_on_first_run)
- {
- NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);
- }
-
- // Run GEMM
- if (_asm_glue.is_configured())
- {
- _asm_glue.run();
- }
- else
- {
- NEScheduler::get().schedule(_mm_kernel.get(), Window::DimY);
- }
-
- if (!_fused_assembly_path)
- {
- // Run matrix A reduction kernel only if _b_offset is not equal to 0
- if (_b_offset != 0)
- {
- NEScheduler::get().schedule(&_mtx_a_reduction_kernel, Window::DimX);
- }
-
- // Run matrix B reduction kernel only if _a_offset is not equal to 0
- if (_a_offset != 0 && !_reshape_b_only_on_first_run)
- {
- NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX);
- }
-
- if (_fuse_output_stage)
- {
- // Run offset contribution kernel
- NEScheduler::get().schedule(&_offset_contribution_output_stage_kernel, Window::DimY);
- }
- else
- {
- // Run offset contribution kernel
- NEScheduler::get().schedule(&_offset_contribution_kernel, Window::DimY);
- }
- }
-}
-
-void NEGEMMLowpMatrixMultiplyCoreEx::prepare()
-{
- if (!_is_prepared)
- {
- // Run assembly reshape
- if (_asm_glue.is_configured() && _reshape_b_only_on_first_run)
- {
- ARM_COMPUTE_ERROR_ON(!_original_b->is_used());
-
- _asm_glue.prepare();
- _original_b->mark_as_unused();
- }
- // Run non-assembly reshape
- else if (_mtx_b_reshape_kernel && _reshape_b_only_on_first_run)
- {
- ARM_COMPUTE_ERROR_ON(!_original_b->is_used());
-
- // Run reshape kernel and mark original weights tensor as unused
- _tmp_b.allocator()->allocate();
- NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);
- _original_b->mark_as_unused();
- }
-
- // Run matrix B reduction kernel only if _a_offset is not equal to 0
- if (_a_offset != 0 && _reshape_b_only_on_first_run)
- {
- _vector_sum_col.allocator()->allocate();
- NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX);
- }
-
- _is_prepared = true;
- }
-}
#include "arm_compute/runtime/NEON/functions/NEGatherEx.h"
#include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
#include <utility>
{
void NEGatherEx::configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis)
{
- auto k = arm_compute::support::cpp14::make_unique<NEGatherKernelEx>();
+ auto k = support::cpp14::make_unique<NEGatherKernelEx>();
k->configure(input, indices, output, axis);
_kernel = std::move(k);
}
#include "arm_compute/runtime/NEON/functions/NEHashtableLookup.h"
#include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
void NEHashtableLookup::configure(const ITensor *lookups, const ITensor *keys, const ITensor *input,
ITensor *output, ITensor *hits)
{
- auto k = arm_compute::support::cpp14::make_unique<NEHashtableLookupKernel>();
+ auto k = support::cpp14::make_unique<NEHashtableLookupKernel>();
k->configure(lookups, keys, input, output, hits);
_kernel = std::move(k);
}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEPReLU.h"
-
-#include "arm_compute/core/NEON/kernels/NEPReLUKernel.h"
-#include "support/ToolchainSupport.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void NEPReLU::configure(const ITensor *input, const ITensor *alpha, ITensor *output)
-{
- auto k = arm_compute::support::cpp14::make_unique<NEPReLUKernel>();
- k->configure(input, alpha, output);
- _kernel = std::move(k);
-}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NERNNLayerEx.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-namespace arm_compute
-{
-NERNNLayerEx::NERNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(),
- _activation_kernel(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(),
- _gemm_output(), _add_output(), _is_prepared(false)
-{
-}
-
-Status NERNNLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights,
- const ITensorInfo *recurrent_weights, const ITensorInfo *bias,
- const ITensorInfo *hidden_state, const ITensorInfo *output,
- const ActivationLayerInfo &info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state,
- output);
-
- const int idx_width = 0;
- const int idx_height = 1;
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_width) != weights->dimension(idx_width));
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_height) !=
- recurrent_weights->dimension(idx_width));
- ARM_COMPUTE_RETURN_ERROR_ON(recurrent_weights->dimension(idx_width) !=
- recurrent_weights->dimension(idx_height));
- ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() != 1);
- ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(idx_width) != weights->dimension(idx_height));
- ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_width) != weights->dimension(idx_height));
- ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_height) != input->dimension(idx_height));
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(),
- hidden_state->tensor_shape());
-
- auto shape_info = TensorInfo(misc::shape_calculator::compute_rnn_shape(
- recurrent_weights, hidden_state->dimension(idx_height)),
- 1, input->data_type());
-
- ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, weights, bias, &shape_info));
- ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(
- &shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE));
- ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayerKernel::validate(&shape_info, &shape_info, info));
-
- return Status{};
-}
-
-void NERNNLayerEx::configure(const ITensor *input, const ITensor *weights,
- const ITensor *recurrent_weights, const ITensor *bias,
- ITensor *hidden_state, ITensor *output, ActivationLayerInfo &info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output);
- ARM_COMPUTE_ERROR_THROW_ON(NERNNLayerEx::validate(input->info(), weights->info(),
- recurrent_weights->info(), bias->info(),
- hidden_state->info(), output->info(), info));
-
- const int idx_height = 1;
- TensorShape shape = misc::shape_calculator::compute_rnn_shape(
- recurrent_weights->info(), hidden_state->info()->dimension(idx_height));
-
- _is_prepared = false;
-
- // Manage intermediate buffers and configure
- _fully_connected_out.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
- _gemm_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
-
- // Manage intermediate buffers and configure
- _memory_group.manage(&_fully_connected_out);
- _fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out);
-
- _memory_group.manage(&_gemm_output);
- _gemm_state_f.configure(hidden_state, recurrent_weights, nullptr, &_gemm_output, 1.f, 0.f);
-
- _add_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
- _memory_group.manage(&_add_output);
-
- _add_kernel.configure(&_fully_connected_out, &_gemm_output, &_add_output,
- ConvertPolicy::SATURATE);
-
- _fully_connected_out.allocator()->allocate();
- _gemm_output.allocator()->allocate();
-
- _activation_kernel.configure(&_add_output, hidden_state, info);
- _add_output.allocator()->allocate();
-
- _copy_kernel.configure(hidden_state, output);
-}
-
-void NERNNLayerEx::run()
-{
- prepare();
-
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- _fully_connected_kernel.run();
-
- _gemm_state_f.run();
-
- NEScheduler::get().schedule(&_add_kernel, Window::DimY);
- NEScheduler::get().schedule(&_activation_kernel, Window::DimY);
-
- // copy hidden out to output
- NEScheduler::get().schedule(&_copy_kernel, Window::DimY);
-}
-
-void NERNNLayerEx::prepare()
-{
- if (!_is_prepared)
- {
- _fully_connected_kernel.prepare();
- _gemm_state_f.prepare();
-
- _is_prepared = true;
- }
-}
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEReduceMeanEx.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-using namespace arm_compute;
-
-NEReduceMeanEx::NEReduceMeanEx(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
- _reduction_ops(), _keep_dims()
-{
-}
-
-Status NEReduceMeanEx::validate(const ITensorInfo *input, const Coordinates &reduction_axis,
- bool keep_dims, const ITensorInfo *output)
-{
- ARM_COMPUTE_UNUSED(keep_dims);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- ARM_COMPUTE_RETURN_ERROR_ON(reduction_axis.num_dimensions() > input->num_dimensions());
-
- TensorShape out_shape = input->tensor_shape();
- const unsigned int reduction_ops = reduction_axis.num_dimensions();
- const int input_dims = input->num_dimensions();
- Coordinates axis_local = reduction_axis;
-
- // Convert negative axis
- for (unsigned int i = 0; i < reduction_ops; ++i)
- {
- axis_local[i] = wrap_around(axis_local[i], input_dims);
- }
-
- std::sort(axis_local.begin(), axis_local.begin() + reduction_ops);
- for (unsigned int i = 0; i < reduction_ops; ++i)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(axis_local[i] > 3);
- ARM_COMPUTE_RETURN_ERROR_ON(static_cast<unsigned int>(axis_local[i]) >
- input->num_dimensions() - 1);
- if (output->total_size() > 0 && keep_dims)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(axis_local[i]) != 1);
- }
- if (keep_dims)
- {
- out_shape.set(axis_local[i], 1);
- }
- else
- {
- out_shape.remove_dimension(axis_local[i] - i);
- }
- }
- const TensorInfo out_info = input->clone()->set_tensor_shape(out_shape);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info);
-
- return Status{};
-}
-
-void NEReduceMeanEx::configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims,
- ITensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input);
-
- _reduction_ops = reduction_axis.num_dimensions();
- _reduction_kernels =
- arm_compute::support::cpp14::make_unique<NEReductionOperation[]>(_reduction_ops);
- _reduced_outs =
- arm_compute::support::cpp14::make_unique<Tensor[]>(_reduction_ops - (keep_dims ? 1 : 0));
- _keep_dims = keep_dims;
-
- Coordinates axis_local = reduction_axis;
- const int input_dims = input->info()->num_dimensions();
- const unsigned int reduction_ops = reduction_axis.num_dimensions();
-
- // Convert negative axis
- for (unsigned int i = 0; i < reduction_ops; ++i)
- {
- axis_local[i] = wrap_around(axis_local[i], input_dims);
- }
-
- // Perform reduction for every axis
- for (unsigned int i = 0; i < _reduction_ops; ++i)
- {
- TensorShape out_shape = i == 0 ? input->info()->tensor_shape()
- : (_reduced_outs.get() + i - 1)->info()->tensor_shape();
- out_shape.set(axis_local[i], 1);
- auto in = (i == 0) ? input : (_reduced_outs.get() + i - 1);
-
- if (i == _reduction_ops - 1 && keep_dims)
- {
- _reduction_kernels[i].configure(in, output, axis_local[i], ReductionOperation::MEAN_SUM);
- }
- else
- {
- _reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(),
- input->info()->data_type(),
- input->info()->quantization_info())
- .set_data_layout(output->info()->data_layout()));
- _memory_group.manage(_reduced_outs.get() + i);
- _reduction_kernels[i].configure(in, _reduced_outs.get() + i, axis_local[i],
- ReductionOperation::MEAN_SUM);
- }
- }
-
- // Allocate intermediate tensors
- for (unsigned int i = 0; i < _reduction_ops - (keep_dims ? 1 : 0); ++i)
- {
- _reduced_outs[i].allocator()->allocate();
- }
-
- // Configure reshape layer if we want to drop the dimensions
- if (!keep_dims)
- {
- TensorShape out_shape = input->info()->tensor_shape();
-
- // We have to sort the reduction axis vectors in order for remove_dimension
- // to work properly
- std::sort(axis_local.begin(), axis_local.begin() + _reduction_ops);
- for (unsigned int i = 0; i < _reduction_ops; ++i)
- {
- out_shape.remove_dimension(axis_local[i] - i);
- }
- auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(out_shape));
- _reshape.configure(_reduced_outs.get() + _reduction_ops - 1, output);
- }
-}
-
-void NEReduceMeanEx::run()
-{
- _memory_group.acquire();
-
- for (unsigned int i = 0; i < _reduction_ops; ++i)
- {
- _reduction_kernels[i].run();
- }
-
- if (!_keep_dims)
- {
- _reshape.run();
- }
- _memory_group.release();
-}
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-namespace arm_compute
-{
-NESpaceToBatchLayerEx::NESpaceToBatchLayerEx()
- : _space_to_batch_kernel(), _memset_kernel(), _has_padding(false)
-{
-}
-
-void NESpaceToBatchLayerEx::configure(const ITensor *input, const ITensor *block_shape,
- const ITensor *paddings, ITensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, block_shape, paddings, output);
-
- if (input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
- {
- _has_padding = true;
- _memset_kernel.configure(
- output, PixelValue(0, output->info()->data_type(), output->info()->quantization_info()));
- }
- _space_to_batch_kernel.configure(input, block_shape, paddings, output);
-}
-
-void NESpaceToBatchLayerEx::configure(const ITensor *input, const int block_shape_x,
- const int block_shape_y, const Size2D &padding_left,
- const Size2D &padding_right, ITensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- if (input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
- {
- _has_padding = true;
- _memset_kernel.configure(
- output, PixelValue(0, output->info()->data_type(), output->info()->quantization_info()));
- }
- _space_to_batch_kernel.configure(input, block_shape_x, block_shape_y, padding_left, padding_right,
- output);
-}
-
-Status NESpaceToBatchLayerEx::validate(const ITensorInfo *input, const ITensorInfo *block_shape,
- const ITensorInfo *paddings, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(
- NESpaceToBatchLayerKernel::validate(input, block_shape, paddings, output));
-
- return Status{};
-}
-
-Status NESpaceToBatchLayerEx::validate(const ITensorInfo *input, const int block_shape_x,
- const int block_shape_y, const Size2D &padding_left,
- const Size2D &padding_right, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(NESpaceToBatchLayerKernel::validate(
- input, block_shape_x, block_shape_y, padding_left, padding_right, output));
-
- return Status{};
-}
-
-void NESpaceToBatchLayerEx::run()
-{
- // Zero out output only if we have paddings
- if (_has_padding)
- {
- NEScheduler::get().schedule(&_memset_kernel, Window::DimY);
- }
- NEScheduler::get().schedule(&_space_to_batch_kernel, Window::DimY);
-}
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-
-namespace arm_compute
-{
-void NESpaceToDepthLayerEx::configure(const ITensor *input, ITensor *output, int32_t block_shape)
-{
- auto k = arm_compute::support::cpp14::make_unique<NESpaceToDepthLayerKernelEx>();
- k->configure(input, output, block_shape);
- _kernel = std::move(k);
-}
-
-Status NESpaceToDepthLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
- int32_t block_shape)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(NESpaceToDepthLayerKernelEx::validate(input, output, block_shape));
- return Status{};
-}
-} // namespace arm_compute
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-
#include "arm_compute/runtime/NEON/functions/NETransposeConvLayer.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Utils.h"
#include "arm_compute/core/UtilsEx.h"
#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
namespace arm_compute
{
+
NETransposeConvLayer::NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
_conv_f(),
_upsample_f(),
_flip_weights(),
- _permute_input(),
- _permute_weights(),
- _permute_output(),
_scaled_output(),
_weights_flipped(),
- _permuted_input(),
- _permuted_weights(),
- _permuted_output(),
- _is_nchw(false),
+ _flip_axis(),
_original_weights(nullptr),
_input(nullptr),
_info(),
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16,
- DataType::QASYMM8);
+ DataType::QASYMM8, DataType::QASYMM8_SIGNED);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input);
const unsigned int width_idx =
weights->dimension(height_idx), info, invalid_right, invalid_bottom);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
- if (is_data_type_quantized_asymmetric(input->data_type()) && bias)
+ if (bias != nullptr)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
- }
- else if (bias)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+ if (is_data_type_quantized_asymmetric(input->data_type()))
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+ }
}
if (output->tensor_shape().total_size() > 0)
const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) < output_shape.x(),
- "Output's dim 0 is invalid.");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) < output_shape.y(),
- "Output's dim 1 is invalid.");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) < output_shape.z(),
- "Output's dim 2 is invalid.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) != output_shape.x(),
+ "Output's width is invalid.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) != output_shape.y(),
+ "Output's height is invalid.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(),
+ "Output's depth is invalid.");
}
unsigned int pad_left = 0;
pad_bottom);
TensorInfo scale_out_info(
input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape));
- scale_out_info.set_data_layout(input->data_layout());
const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
const unsigned int batches_idx =
ITensor *output, const PadStrideInfo &info,
unsigned int invalid_right, unsigned int invalid_bottom)
{
+ // Perform validation step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate(
+ input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(),
+ info, invalid_right, invalid_bottom));
const DataLayout data_layout = input->info()->data_layout();
-
- _input = input;
- _original_weights = weights;
- _info = info;
- _is_prepared = false;
- _is_nchw = data_layout == DataLayout::NCHW;
-
- const unsigned int stride_x = info.stride().first;
- const unsigned int stride_y = info.stride().second;
-
const unsigned int width_idx =
get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const unsigned int height_idx =
const TensorShape output_shape =
compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
+
+ _input = input;
+ _original_weights = weights;
+ _info = info;
+ _is_prepared = false;
+
+ unsigned int pad_left = 0;
+ unsigned int pad_right = 0;
+ unsigned int pad_top = 0;
+ unsigned int pad_bottom = 0;
+ const unsigned int stride_x = info.stride().first;
+ const unsigned int stride_y = info.stride().second;
+
// Output auto initialization if not yet initialized
auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(),
input->info()->quantization_info());
- // Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate(
- input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(),
- info, invalid_right, invalid_bottom));
-
+ _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
_memory_group.manage(&_scaled_output);
- if (!_is_nchw)
- {
- _memory_group.manage(&_permuted_input);
- _memory_group.manage(&_permuted_weights);
- _memory_group.manage(&_permuted_output);
-
- // Configure the function to transform the input tensor from NHWC -> NCHW
- _permuted_input.info()->set_quantization_info(input->info()->quantization_info());
- _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
- _permuted_input.info()->set_data_layout(DataLayout::NCHW);
-
- // Configure the function to transform the weights tensor from NHWC -> NCHW
- _permuted_weights.info()->set_quantization_info(weights->info()->quantization_info());
- _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
- _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
-
- // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in
- // order to match output shape
-
- unsigned int pad_left = 0;
- unsigned int pad_right = 0;
- unsigned int pad_top = 0;
- unsigned int pad_bottom = 0;
- const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *_permuted_input.info(), *_permuted_weights.info(), info, out_dims, invalid_right,
- invalid_bottom, pad_left, pad_right, pad_top, pad_bottom);
-
- TensorInfo scale_out_info(scale_out_shape, 1, _permuted_input.info()->data_type(),
- _permuted_input.info()->quantization_info());
- scale_out_info.set_data_layout(DataLayout::NCHW);
- _scaled_output.allocator()->init(scale_out_info);
-
- const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
- DimensionRoundingType::CEIL);
- _upsample_f.configure(&_permuted_input, &_scaled_output, upsample_info);
-
- _weights_flipped.allocator()->init(*_permuted_weights.info()->clone());
- _weights_flipped.info()->set_quantization_info(weights->info()->quantization_info());
- _flip_weights.configure(&_permuted_weights, &_weights_flipped);
-
- // setup the function to convolve the upscaled output
- const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
-
- const auto out_shape = output->info()->tensor_shape();
- TensorShape permuted_out_shape{out_shape[1], out_shape[2], out_shape[0], out_shape[3]};
- TensorInfo permuted_out_info(permuted_out_shape, 1, output->info()->data_type(),
- output->info()->quantization_info());
- _permuted_output.allocator()->init(permuted_out_info);
- _permuted_output.info()->set_data_layout(DataLayout::NCHW);
- _conv_f.configure(&_scaled_output, &_weights_flipped, bias, &_permuted_output, conv_info);
-
- // Configure the function to transform the convoluted output to NHWC
- _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
-
- _permuted_input.allocator()->allocate();
- _permuted_weights.allocator()->allocate();
- _permuted_output.allocator()->allocate();
- }
- else
- {
- // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in
- // order to match output shape
- unsigned int pad_left = 0;
- unsigned int pad_right = 0;
- unsigned int pad_top = 0;
- unsigned int pad_bottom = 0;
- const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
- *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
- pad_right, pad_top, pad_bottom);
-
- TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
- input->info()->quantization_info());
- _scaled_output.allocator()->init(scale_out_info);
- const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
- DimensionRoundingType::FLOOR);
- _upsample_f.configure(input, &_scaled_output, upsample_info);
-
- _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
- _flip_weights.configure(weights, &_weights_flipped);
-
- // setup the function to convolve the upscaled output
- const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
- _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info);
- }
+ _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
+ _flip_weights.configure(weights, &_weights_flipped, &_flip_axis);
+
+ // setup the function to convolve the upscaled output
+ const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+
+ const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
+ *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
+ pad_right, pad_top, pad_bottom);
+
+ const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
+ DimensionRoundingType::FLOOR);
+
+ TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
+ input->info()->quantization_info());
+ scale_out_info.set_data_layout(data_layout);
+ _scaled_output.allocator()->init(scale_out_info);
+
+ _upsample_f.configure(input, &_scaled_output, upsample_info);
+
+ _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info);
+
+ // Setup flip axis data
+ _flip_axis.allocator()->allocate();
+ auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
+ axis_data[0] = static_cast<uint32_t>(width_idx);
+ axis_data[1] = static_cast<uint32_t>(height_idx);
+
_scaled_output.allocator()->allocate();
}
{
prepare();
- // MemoryGroupResourceScope scope_mg(_memory_group);
-
- // Permute input
- if (!_is_nchw)
- {
- _permute_input.run();
- }
+ MemoryGroupResourceScope scope_mg(_memory_group);
_upsample_f.run();
_conv_f.run();
-
- // Permute output
- if (!_is_nchw)
- {
- _permute_output.run();
- }
}
void NETransposeConvLayer::prepare()
// Run weights flipping and mark original weights tensor as unused
_weights_flipped.allocator()->allocate();
- // Permute weights
- if (!_is_nchw)
- {
- _permute_weights.run();
- }
- NEScheduler::get().schedule(&_flip_weights, Window::DimZ);
+ _flip_weights.run();
_original_weights->mark_as_unused();
// Prepare convolution
_conv_f.prepare();
- if (!_weights_flipped.is_used())
- {
- _weights_flipped.allocator()->free();
- }
-
_is_prepared = true;
}
}
target_link_libraries(nnfw_lib_cker INTERFACE ruy)
target_link_libraries(nnfw_lib_cker INTERFACE ruy_instrumentation)
target_compile_definitions(nnfw_lib_cker INTERFACE USE_RUY_GEMV)
+if(EXPERIMENTAL_RUY_FEATURE)
+ target_compile_definitions(nnfw_lib_cker INTERFACE EXPERIMENTAL_RUY_FEATURE)
+endif(EXPERIMENTAL_RUY_FEATURE)
if(PROFILE_RUY)
target_link_libraries(nnfw_lib_cker INTERFACE ruy_profiler)
endif(PROFILE_RUY)
void NeonCpuBackendGemm(const int8_t *input, const int32_t *bias,
const int8_t *input_to_gate_weights, int32_t n_batch, int32_t n_input,
- int32_t n_output, int32_t, int32_t *scratch)
+ int32_t n_output, int32_t, int32_t *scratch, ruy::Context *ruy_context)
{
MatrixParams<int8_t> lhs_params;
lhs_params.order = Order::kRowMajor;
}
// Below code is from tflite::cpu_backend_gemm::detail::GemmImplUsingRuy
- ruy::Context *ruy_context = ruy_support::GetRuyContext();
-
ruy::Matrix<int8_t> ruy_lhs;
ruy::Matrix<int8_t> ruy_rhs;
ruy::Matrix<int32_t> ruy_dst;
const int m_cols, const int8_t *__restrict__ vectors,
const float *scaling_factors, int n_batch,
int32_t *scratch, float *__restrict__ result,
- int result_stride)
+ int result_stride, ruy::Context *ruy_context)
{
if (m_rows % 4 == 0 && result_stride == 1)
{
const int32_t *bias = static_cast<const int32_t *>(nullptr);
NeonCpuBackendGemm(vectors, bias, matrix, n_batch, m_cols, m_rows,
- /*output_zp =*/0, scratch);
+ /*output_zp =*/0, scratch, ruy_context);
// Multiply by float scaling factors and write to result
const int total_size = n_batch * m_rows;
#include "cker/Types.h"
#include "cker/neon/neon_check.h"
+#include <ruy/context.h>
#include <cstring>
#include <cmath>
const int8_t *__restrict__ vector,
const float *scaling_factors, int n_batch,
int32_t *, float *__restrict__ result,
- int result_stride)
+ int result_stride, ruy::Context *)
{
PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector, scaling_factors,
n_batch, result, result_stride);
void MatrixBatchVectorMultiplyAccumulate(const int8_t *matrix, const int m_rows, const int m_cols,
const int8_t *vectors, const float *scaling_factors,
int n_batch, int32_t *scratch, float *result,
- int result_stride)
+ int result_stride, ruy::Context *ruy_context)
{
NEON_OR_PORTABLE(MatrixBatchVectorMultiplyAccumulate, matrix, m_rows, m_cols, vectors,
- scaling_factors, n_batch, scratch, result, result_stride);
+ scaling_factors, n_batch, scratch, result, result_stride, ruy_context);
}
void ZeroVector(float *vector, int v_size) { PortableZeroVector(vector, v_size); }
// FullyConnectedWeightsFormat weights_format;
};
+struct L2NormParams
+{
+ // uint8 inference params.
+ int32_t input_zero_point;
+};
+
struct GatherParams
{
int32_t axis;
float float_activation_max;
};
+struct ResizeBilinearParams
+{
+ int32_t output_height;
+ int32_t output_width;
+ bool align_corners;
+ bool half_pixel_centers;
+};
+
struct TransposeConvParams
{
PaddingType padding_type;
int16_t axis;
};
+struct SplitVParams
+{
+ uint16_t num_split;
+ int16_t axis;
+};
+
struct FusedBatchNormParams
{
bool is_training;
int32_t output_offset;
};
+struct SpaceToDepthParams
+{
+ int32_t block_size;
+};
+
enum class Order
{
kColMajor,
return leading_zeros;
}
+inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
+ int32_t *output_inv_sqrt, int *output_shift)
+{
+ assert(input >= 0);
+ if (input <= 1)
+ {
+ // Handle the input value 1 separately to avoid overflow in that case
+ // in the general computation below (b/143972021). Also handle 0 as if it
+ // were a 1. 0 is an invalid input here (divide by zero) and 1 is a valid
+ // but rare/unrealistic input value. We can expect both to occur in some
+ // incompletely trained models, but probably not in fully trained models.
+ *output_inv_sqrt = std::numeric_limits<std::int32_t>::max();
+ *output_shift = 0;
+ return;
+ }
+ assert(input > 1);
+ *output_shift = 11;
+ while (input >= (1 << 29))
+ {
+ input /= 4;
+ ++*output_shift;
+ }
+ const unsigned max_left_shift_bits = CountLeadingZeros(static_cast<uint32_t>(input)) - 1;
+ const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
+ const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
+ *output_shift -= left_shift_bit_pairs;
+ input <<= 2 * left_shift_bit_pairs;
+ assert(input >= (1 << 27));
+ assert(input < (1 << 29));
+ using gemmlowp::FixedPoint;
+ using gemmlowp::Rescale;
+ using gemmlowp::SaturatingRoundingMultiplyByPOT;
+ // Using 3 integer bits gives us enough room for the internal arithmetic in
+ // this Newton-Raphson iteration.
+ using F3 = FixedPoint<int32_t, 3>;
+ using F0 = FixedPoint<int32_t, 0>;
+ const F3 fixedpoint_input = F3::FromRaw(input >> 1);
+ const F3 fixedpoint_half_input = SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);
+ const F3 fixedpoint_half_three =
+ GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5);
+ // Newton-Raphson iteration
+ // Naive unoptimized starting guess: x = 1
+ F3 x = F3::One();
+ // Naive unoptimized number of iterations: 5
+ for (int i = 0; i < 5; i++)
+ {
+ const F3 x3 = Rescale<3>(x * x * x);
+ x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3);
+ }
+ const F0 fixedpoint_half_sqrt_2 =
+ GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.);
+ x = x * fixedpoint_half_sqrt_2;
+ *output_inv_sqrt = x.raw();
+ if (*output_shift < 0)
+ {
+ *output_inv_sqrt <<= -*output_shift;
+ *output_shift = 0;
+ }
+ // Convert right shift (right is positive) to left shift.
+ *output_shift *= reverse_shift;
+}
+
// Comment from tensorflow lite:
//
// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_BATCH_TO_SPACE_ND_H__
+#define __NNFW_CKER_BATCH_TO_SPACE_ND_H__
+
+#include "cker/Shape.h"
+
+#define UNUSED(x) ((void)(x))
+
+namespace nnfw
+{
+namespace cker
+{
+
+// Helper methods for BatchToSpaceND.
+// `spatial_index_dim` specifies post-crop offset index in this spatial
+// dimension, i.e. spatial offset introduced by flattening batch to spatial
+// dimension minus the crop size at beginning. `block_shape_dim` is the block
+// size in current dimension. `input_dim` and `output_dim` are input and output
+// size of BatchToSpaceND operation in current dimension.
+// Output start index is inclusive and end index is exclusive.
+inline void GetIndexRange(int spatial_index_dim, int block_shape_dim, int input_dim, int output_dim,
+ int *start_index, int *end_index)
+{
+ // (*start_index) * block_shape_dim is effectively rounded up to the next
+ // multiple of block_shape_dim by the integer division.
+ *start_index = std::max(0, (-spatial_index_dim + block_shape_dim - 1) / block_shape_dim);
+ // Similarly, (*end_index) * block_shape_dim is rounded up too (note that
+ // end_index is exclusive).
+ *end_index =
+ std::min(input_dim, (output_dim - spatial_index_dim + block_shape_dim - 1) / block_shape_dim);
+}
+
+template <typename T>
+inline void BatchToSpaceND(const Shape &unextended_input1_shape, const T *input1_data,
+ const int32_t *block_shape_data, const int32_t *crops_data,
+ const Shape &unextended_output_shape, T *output_data)
+{
+ auto input_dim = unextended_input1_shape.DimensionsCount();
+ auto output_dim = unextended_output_shape.DimensionsCount();
+
+ assert(input_dim == 3 || input_dim == 4);
+ assert(input_dim == output_dim);
+
+ UNUSED(input_dim);
+ UNUSED(output_dim);
+
+ // Extends the input/output shape from 3D to 4D if needed, NHC -> NH1C.
+ auto extend_shape = [](const Shape &shape) {
+ if (shape.DimensionsCount() == 4)
+ {
+ return shape;
+ }
+ Shape new_shape(4, 1);
+ new_shape.SetDim(0, shape.Dims(0));
+ new_shape.SetDim(1, shape.Dims(1));
+ new_shape.SetDim(3, shape.Dims(2));
+ return new_shape;
+ };
+ const Shape input1_shape = extend_shape(unextended_input1_shape);
+ const Shape output_shape = extend_shape(unextended_output_shape);
+
+ const int32_t output_width = output_shape.Dims(2);
+ const int32_t output_height = output_shape.Dims(1);
+ const int32_t output_batch_size = output_shape.Dims(0);
+
+ const int32_t depth = input1_shape.Dims(3);
+ const int32_t input_width = input1_shape.Dims(2);
+ const int32_t input_height = input1_shape.Dims(1);
+ const int32_t input_batch_size = input1_shape.Dims(0);
+
+ const int32_t block_shape_height = block_shape_data[0];
+ const int32_t block_shape_width = block_shape_data[1];
+
+ const int32_t crops_top = crops_data[0];
+ const int32_t crops_left = crops_data[2];
+
+ for (int in_batch = 0; in_batch < input_batch_size; ++in_batch)
+ {
+ const int out_batch = in_batch % output_batch_size;
+ const int spatial_offset = in_batch / output_batch_size;
+
+ int in_h_start = 0;
+ int in_h_end = 0;
+ // GetIndexRange ensures start and end indices are in [0, output_height).
+ GetIndexRange(spatial_offset / block_shape_width - crops_top, block_shape_height, input_height,
+ output_height, &in_h_start, &in_h_end);
+
+ for (int in_h = in_h_start; in_h < in_h_end; ++in_h)
+ {
+ const int out_h = in_h * block_shape_height + spatial_offset / block_shape_width - crops_top;
+ assert(out_h >= 0);
+ assert(out_h < output_height);
+
+ int in_w_start = 0;
+ int in_w_end = 0;
+ // GetIndexRange ensures start and end indices are in [0, output_width).
+ GetIndexRange(spatial_offset % block_shape_width - crops_left, block_shape_width, input_width,
+ output_width, &in_w_start, &in_w_end);
+
+ for (int in_w = in_w_start; in_w < in_w_end; ++in_w)
+ {
+ const int out_w =
+ in_w * block_shape_width + spatial_offset % block_shape_width - crops_left;
+ assert(out_w >= 0);
+ assert(out_w < output_width);
+ T *out = output_data + Offset(output_shape, out_batch, out_h, out_w, 0);
+ const T *in = input1_data + Offset(input1_shape, in_batch, in_h, in_w, 0);
+ memcpy(out, in, depth * sizeof(T));
+ }
+ }
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_BATCH_TO_SPACE_ND_H__
#ifndef __NNFW_CKER_FULLY_CONNECTED_H__
#define __NNFW_CKER_FULLY_CONNECTED_H__
+#include <ruy/context.h>
#include "cker/Shape.h"
#include "cker/Types.h"
#include "cker/Utils.h"
MatrixBatchVectorMultiplyAccumulate(weights_data, num_units, input_size, input_data, batch_size,
output_data, /*result_stride=*/1);
- // Apply activation function
- ApplyActivationToVector(output_data, batch_size * num_units, params.activation, output_data);
+ if (params.activation != FusedActivationFunctionType::kNone)
+ {
+ // Apply activation function
+ ApplyActivationToVector(output_data, batch_size * num_units, params.activation, output_data);
+ }
}
inline void FullyConnected(const FullyConnectedParams ¶ms, const Shape &input_shape,
const float *input_data, const Shape &filter_shape,
const int8_t *filter_data, const Shape &, const float *bias_data,
const Shape &output_shape, float *output_data,
- FCTempArena &temp_arena)
+ FCTempArena &temp_arena, ruy::Context *ruy_context)
{
int total_input_size = input_shape.FlatSize();
const int input_size = filter_shape.Dims(1);
int32_t *scratch = temp_arena.accum_scratch.data();
MatrixBatchVectorMultiplyAccumulate(filter_data, num_units, input_size, quant_data,
scaling_factors_ptr, batch_size, scratch, output_data,
- /*result_stride=*/1);
+ /*result_stride=*/1, ruy_context);
#else
MatrixBatchVectorMultiplyAccumulate(filter_data, num_units, input_size, quant_data,
scaling_factors_ptr, batch_size, output_data,
/*result_stride=*/1);
+ UNUSED_RELEASE(ruy_context);
UNUSED_RELEASE(output_shape);
#endif
// Apply activation function to floats.
- ApplyActivationToVector(output_data, batch_size * num_units, params.activation, output_data);
+ if (params.activation != FusedActivationFunctionType::kNone)
+ {
+ // Apply activation function
+ ApplyActivationToVector(output_data, batch_size * num_units, params.activation, output_data);
+ }
return;
}
+inline void FullyConnectedSparseWeight(const FullyConnectedParams ¶ms, const Shape &input_shape,
+ const float *input_data, const Shape &weights_shape,
+ const float *weights_data, const Shape &bias_shape,
+ const float *bias_data, const Shape &output_shape,
+ float *output_data, int w0_size, const uint16_t *w1_segments,
+ const uint16_t *w1_indices)
+{
+ UNUSED_RELEASE(params);
+ UNUSED_RELEASE(input_shape);
+
+ assert(weights_shape.DimensionsCount() == 2);
+ assert(output_shape.DimensionsCount() == 2);
+
+ const int output_dims_count = output_shape.DimensionsCount();
+ const int weights_dims_count = weights_shape.DimensionsCount();
+ const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
+ const int output_depth =
+ MatchingDim(weights_shape, weights_dims_count - 2, output_shape, output_dims_count - 1);
+ const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
+
+ UNUSED_RELEASE(bias_shape);
+ if (bias_data)
+ {
+ VectorBatchVectorAssign(bias_data, output_depth, batches, output_data);
+ }
+ else
+ {
+ ZeroVector(output_data, batches * output_depth);
+ }
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int idx_0 = 0; idx_0 < w0_size; ++idx_0)
+ {
+ for (int pw1 = w1_segments[idx_0]; pw1 < w1_segments[idx_0 + 1]; ++pw1)
+ {
+ int idx_1 = w1_indices[pw1];
+ output_data[b * output_depth + idx_0] +=
+ weights_data[pw1] * input_data[b * accum_depth + idx_1];
+ }
+ }
+ }
+ if (params.activation != FusedActivationFunctionType::kNone)
+ {
+ // Apply activation function
+ ApplyActivationToVector(output_data, batches * output_depth, params.activation, output_data);
+ }
+}
+
} // namespace cker
} // namespace nnfw
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TENSORFLOW_CORE_LIB_RANDOM_PHILOX_RANDOM_H_
+#define TENSORFLOW_CORE_LIB_RANDOM_PHILOX_RANDOM_H_
+
+#include <stdlib.h>
+
+#include "cker/Types.h"
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+// Function qualifiers that need to work on both CPU and GPU.
+#if defined(__CUDACC__) || defined(__HIPCC__)
+// For nvcc.
+#define PHILOX_DEVICE_FUNC __host__ __device__
+#define PHILOX_INLINE __inline__
+#else
+// For non-nvcc.
+#define PHILOX_DEVICE_FUNC
+#define PHILOX_INLINE inline
+#endif
+#define PHILOX_DEVICE_INLINE PHILOX_DEVICE_FUNC PHILOX_INLINE
+
+#include <math.h>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace random
+{
+
+// A class that represents an inline array. It can be used on both CPU and GPU,
+// and also trivially copyable between CPU and GPU.
+// Arguments:
+// T: the array element type;
+// ElementCount: the fixed size of the array;
+template <typename T, int ElementCount> class Array
+{
+public:
+ static constexpr int kElementCount = ElementCount;
+ PHILOX_DEVICE_INLINE Array()
+ {
+ for (int i = 0; i < ElementCount; ++i)
+ {
+ data_[i] = T(0);
+ }
+ }
+
+ PHILOX_DEVICE_INLINE const T &operator[](int index) const { return data_[index]; }
+
+ PHILOX_DEVICE_INLINE T &operator[](int index) { return data_[index]; }
+
+ size_t size() const { return ElementCount; }
+
+private:
+ T data_[ElementCount];
+};
+
+// A class that encapsulates all the states for a random number generator using
+// the philox_4x32_10 algorithm. Each invocation returns a 128-bit random bits
+// in the form of four uint32.
+// There are multiple variants of this algorithm, we picked the 4x32_10 version
+// that is most suited for our applications.
+// Since this class is meant to be copied between CPU to GPU, it maintains a
+// value semantics.
+//
+// For example: To use this class and populate an array of 1024 randoms on CPU
+// with two threads,
+//
+// void Fill(PhiloxRandom rnd, uint32* output, int start, int limit) {
+// assert(start % 4 == 0);
+// assert(limit % 4 == 0);
+// rnd.Skip(start / 4);
+// for (int i = start; i < limit; i += 4) {
+// auto sample = rnd();
+// ... copy sample[0..3] to output[i..i+3]
+// }
+// }
+//
+// PhiloxRandom rng(seed);
+// PhiloxRandom rng_copy = rng;
+// rng.Skip(1000/4);
+//
+// ... schedule Fill(rng_copy, output, 0, 512) in thread 1;
+// ... schedule Fill(rng_copy, output, 512, 1024) in thread 2;
+// ... wait for thread 1 & 2 to finish executing Fill().
+//
+// NOTE:
+// 1. PhiloxRandom is trivially copyable.
+// 2. PhiloxRandom is compilable by gcc and nvcc.
+class PhiloxRandom
+{
+public:
+ using ResultType = Array<uint32_t, 4>;
+ using ResultElementType = uint32_t;
+ // The number of elements that will be returned.
+ static constexpr int kResultElementCount = 4;
+ // Cost of generation of a single element (in cycles).
+ static constexpr int kElementCost = 10;
+ // The type for the 64-bit key stored in the form of two 32-bit uint
+ // that are used in the diffusion process.
+ using Key = Array<uint32_t, 2>;
+
+ PHILOX_DEVICE_INLINE
+ PhiloxRandom() {}
+
+ PHILOX_DEVICE_INLINE
+ explicit PhiloxRandom(uint64_t seed)
+ {
+ key_[0] = static_cast<uint32_t>(seed);
+ key_[1] = static_cast<uint32_t>(seed >> 32);
+ }
+
+ PHILOX_DEVICE_INLINE
+ explicit PhiloxRandom(uint64_t seed_lo, uint64_t seed_hi)
+ {
+ key_[0] = static_cast<uint32_t>(seed_lo);
+ key_[1] = static_cast<uint32_t>(seed_lo >> 32);
+ counter_[2] = static_cast<uint32_t>(seed_hi);
+ counter_[3] = static_cast<uint32_t>(seed_hi >> 32);
+ }
+
+ PHILOX_DEVICE_INLINE
+ PhiloxRandom(ResultType counter, Key key) : counter_(counter), key_(key) {}
+
+ PHILOX_DEVICE_INLINE
+ ResultType const &counter() const { return counter_; }
+
+ PHILOX_DEVICE_INLINE
+ Key const &key() const { return key_; }
+
+ // Skip the specified number of samples of 128-bits in the current stream.
+ PHILOX_DEVICE_INLINE
+ void Skip(uint64_t count)
+ {
+ const uint32_t count_lo = static_cast<uint32_t>(count);
+ uint32_t count_hi = static_cast<uint32_t>(count >> 32);
+
+ counter_[0] += count_lo;
+ if (counter_[0] < count_lo)
+ {
+ ++count_hi;
+ }
+
+ counter_[1] += count_hi;
+ if (counter_[1] < count_hi)
+ {
+ if (++counter_[2] == 0)
+ {
+ ++counter_[3];
+ }
+ }
+ }
+
+ // Returns a group of four random numbers using the underlying Philox
+ // algorithm.
+ PHILOX_DEVICE_INLINE ResultType operator()()
+ {
+ ResultType counter = counter_;
+ Key key = key_;
+
+ // Run the single rounds for ten times. Manually unrolling the loop
+ // for better performance.
+ counter = ComputeSingleRound(counter, key);
+ RaiseKey(&key);
+ counter = ComputeSingleRound(counter, key);
+ RaiseKey(&key);
+ counter = ComputeSingleRound(counter, key);
+ RaiseKey(&key);
+ counter = ComputeSingleRound(counter, key);
+ RaiseKey(&key);
+ counter = ComputeSingleRound(counter, key);
+ RaiseKey(&key);
+ counter = ComputeSingleRound(counter, key);
+ RaiseKey(&key);
+ counter = ComputeSingleRound(counter, key);
+ RaiseKey(&key);
+ counter = ComputeSingleRound(counter, key);
+ RaiseKey(&key);
+ counter = ComputeSingleRound(counter, key);
+ RaiseKey(&key);
+ counter = ComputeSingleRound(counter, key);
+
+ SkipOne();
+
+ return counter;
+ }
+
+private:
+ // We use the same constants as recommended by the original paper.
+ static constexpr uint32_t kPhiloxW32A = 0x9E3779B9;
+ static constexpr uint32_t kPhiloxW32B = 0xBB67AE85;
+ static constexpr uint32_t kPhiloxM4x32A = 0xD2511F53;
+ static constexpr uint32_t kPhiloxM4x32B = 0xCD9E8D57;
+
+ // Helper function to skip the next sample of 128-bits in the current stream.
+ PHILOX_DEVICE_INLINE void SkipOne()
+ {
+ if (++counter_[0] == 0)
+ {
+ if (++counter_[1] == 0)
+ {
+ if (++counter_[2] == 0)
+ {
+ ++counter_[3];
+ }
+ }
+ }
+ }
+
+ // Helper function to return the lower and higher 32-bits from two 32-bit
+ // integer multiplications.
+ PHILOX_DEVICE_INLINE
+ static void MultiplyHighLow(uint32_t a, uint32_t b, uint32_t *result_low, uint32_t *result_high)
+ {
+#ifndef __CUDA_ARCH__
+ const uint64_t product = static_cast<uint64_t>(a) * b;
+ *result_low = static_cast<uint32_t>(product);
+ *result_high = static_cast<uint32_t>(product >> 32);
+#else
+ *result_low = a * b;
+ *result_high = __umulhi(a, b);
+#endif
+ }
+
+ // Helper function for a single round of the underlying Philox algorithm.
+ PHILOX_DEVICE_INLINE static ResultType ComputeSingleRound(const ResultType &counter,
+ const Key &key)
+ {
+ uint32_t lo0;
+ uint32_t hi0;
+ MultiplyHighLow(kPhiloxM4x32A, counter[0], &lo0, &hi0);
+
+ uint32_t lo1;
+ uint32_t hi1;
+ MultiplyHighLow(kPhiloxM4x32B, counter[2], &lo1, &hi1);
+
+ ResultType result;
+ result[0] = hi1 ^ counter[1] ^ key[0];
+ result[1] = lo1;
+ result[2] = hi0 ^ counter[3] ^ key[1];
+ result[3] = lo0;
+ return result;
+ }
+
+ PHILOX_DEVICE_INLINE void RaiseKey(Key *key)
+ {
+ (*key)[0] += kPhiloxW32A;
+ (*key)[1] += kPhiloxW32B;
+ }
+
+private:
+ ResultType counter_;
+ Key key_;
+};
+
+} // namespace random
+} // namespace cker
+} // namespace nnfw
+#endif // TENSORFLOW_CORE_LIB_RANDOM_PHILOX_RANDOM_H_
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_HELPER_RANDOM_DISTRIBUTIONS_H__
+#define __NNFW_CKER_HELPER_RANDOM_DISTRIBUTIONS_H__
+
+#include <string.h>
+
+#include <cmath>
+
+#include <algorithm>
+#include <type_traits>
+
+#include "cker/Types.h"
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+#include "cker/eigen/EigenSupport.h"
+#include "cker/operation/Helper/PhiloxRandom.h"
+
+namespace nnfw
+{
+namespace cker
+{
+namespace random
+{
+
+// Helper function to convert a 16-bit integer to a half between [0..1).
+PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16_t x);
+// Helper function to convert a 16-bit integer to a bfloat16 between [0..1).
+// PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x);
+// Helper function to convert a 32-bit integer to a float between [0..1).
+PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32_t x);
+// Helper function to convert two 32-bit integers to a double between [0..1).
+PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32_t x0, uint32_t x1);
+
+// Computes a + b. Requires that the result is representable in the destination
+// type and that b is not maximal (i.e. b + 1 is not 0). Notably, the addend b
+// need *not* be representable in that type. (The condition on b excludes the
+// extremal case INT_MIN + UINT_MAX = INT_MAX, which this function cannot
+// compute.)
+template <typename Int>
+PHILOX_DEVICE_INLINE Int SignedAdd(Int a, typename std::make_unsigned<Int>::type b)
+{
+ // Implementation note: both b_div_2 and b - b_div_2 are positive and
+ // representable as Int.
+ auto b_div_2 = b >> 1;
+ return a + static_cast<Int>(b_div_2) + static_cast<Int>(b - b_div_2);
+}
+
+// A class that generates uniform distribution random numbers from the
+// underlying random integer generator.
+// Arguments:
+// Generator: a generator type that returns a number of uint32 upon each
+// invocation. It needs to define kResultElementCount for the
+// sample count for each invocation, and ResultType for the
+// actual returned sample type.
+// RealType: the data type of the real numbers that will be returned by the
+// distribution. This could be either float or double for now.
+// This class is meant to be implemented through specialization. The default
+// is not defined by design.
+template <class Generator, typename RealType> class UniformDistribution;
+
+template <class Generator> class UniformDistribution<Generator, Eigen::half>
+{
+public:
+ // The number of elements that will be returned.
+ static constexpr int kResultElementCount = Generator::kResultElementCount;
+ // Cost of generation of a single element (in cycles).
+ static constexpr int kElementCost = 3;
+ // Indicate that this distribution may take variable number of samples
+ // during the runtime.
+ static constexpr bool kVariableSamplesPerOutput = false;
+ typedef Array<Eigen::half, kResultElementCount> ResultType;
+ typedef Eigen::half ResultElementType;
+
+ PHILOX_DEVICE_INLINE
+ ResultType operator()(Generator *gen)
+ {
+ typename Generator::ResultType sample = (*gen)();
+ ResultType result;
+ for (int i = 0; i < kResultElementCount; ++i)
+ {
+ result[i] = Uint16ToHalf(sample[i]); // Truncate the upper 16 bits.
+ }
+ return result;
+ }
+};
+
+template <class Generator> class UniformDistribution<Generator, float>
+{
+public:
+ // The number of elements that will be returned.
+ static constexpr int kResultElementCount = Generator::kResultElementCount;
+ // Cost of generation of a single element (in cycles).
+ static constexpr int kElementCost = 3;
+ // Indicate that this distribution may take variable number of samples
+ // during the runtime.
+ static constexpr bool kVariableSamplesPerOutput = false;
+ typedef Array<float, kResultElementCount> ResultType;
+ typedef float ResultElementType;
+
+ PHILOX_DEVICE_INLINE
+ ResultType operator()(Generator *gen)
+ {
+ typename Generator::ResultType sample = (*gen)();
+ ResultType result;
+ for (int i = 0; i < kResultElementCount; ++i)
+ {
+ result[i] = Uint32ToFloat(sample[i]);
+ }
+ return result;
+ }
+};
+
+template <class Generator> class UniformDistribution<Generator, double>
+{
+public:
+ // The number of elements that will be returned.
+ static constexpr int kResultElementCount = Generator::kResultElementCount / 2;
+ // Cost of generation of a single element (in cycles).
+ static constexpr int kElementCost = 3;
+ // Indicate that this distribution may take variable number of samples
+ // during the runtime.
+ static constexpr bool kVariableSamplesPerOutput = false;
+ typedef Array<double, kResultElementCount> ResultType;
+ typedef double ResultElementType;
+
+ PHILOX_DEVICE_INLINE
+ ResultType operator()(Generator *gen)
+ {
+ typename Generator::ResultType sample = (*gen)();
+ ResultType result;
+ for (int i = 0; i < kResultElementCount; ++i)
+ {
+ result[i] = Uint64ToDouble(sample[2 * i], sample[2 * i + 1]);
+ }
+ return result;
+ }
+};
+
+template <class Generator> class UniformDistribution<Generator, int32_t>
+{
+public:
+ // The number of elements that will be returned.
+ static constexpr int kResultElementCount = Generator::kResultElementCount;
+ // Cost of generation of a single element (in cycles).
+ static constexpr int kElementCost = 3;
+ // Indicate that this distribution may take variable number of samples
+ // during the runtime.
+ static constexpr bool kVariableSamplesPerOutput = false;
+ typedef Array<int32_t, kResultElementCount> ResultType;
+ typedef int32_t ResultElementType;
+
+ // Must have lo < hi
+ UniformDistribution(int32_t lo, int32_t hi)
+ : lo_(lo), range_(static_cast<uint32_t>(hi) - static_cast<uint32_t>(lo))
+ {
+ }
+
+ PHILOX_DEVICE_INLINE
+ ResultType operator()(Generator *gen)
+ {
+ typename Generator::ResultType sample = (*gen)();
+ ResultType result;
+ for (int i = 0; i < kResultElementCount; ++i)
+ {
+ result[i] = SignedAdd(lo_, sample[i] % range_);
+ }
+ return result;
+ }
+
+private:
+ // Note that lo_ is intentionally signed while range_ is intentionally
+ // unsigned. This is because hi - lo can overflow signed integers if
+ // lo < 0 < hi, but always fits in unsigned.
+ int32_t lo_;
+ int32_t range_;
+};
+
+template <class Generator> class UniformDistribution<Generator, int64_t>
+{
+public:
+ // The number of elements that will be returned.
+ static constexpr int kResultElementCount = Generator::kResultElementCount / 2;
+ // Cost of generation of a single element (in cycles).
+ static constexpr int kElementCost = 3;
+ // Indicate that this distribution may take variable number of samples
+ // during the runtime.
+ static constexpr bool kVariableSamplesPerOutput = false;
+ typedef Array<int64_t, kResultElementCount> ResultType;
+ typedef int64_t ResultElementType;
+
+ // Must have lo < hi
+ UniformDistribution(int64_t lo, int64_t hi)
+ : lo_(lo), range_(static_cast<uint64_t>(hi) - static_cast<uint64_t>(lo))
+ {
+ }
+
+ PHILOX_DEVICE_INLINE
+ ResultType operator()(Generator *gen)
+ {
+ typename Generator::ResultType sample = (*gen)();
+ ResultType result;
+ for (int i = 0; i < kResultElementCount; ++i)
+ {
+ auto bits = sample[2 * i] | static_cast<uint64_t>(sample[2 * i + 1]) << 32;
+ result[i] = SignedAdd(lo_, bits % range_);
+ }
+ return result;
+ }
+
+private:
+ // Note that lo_ is intentionally signed while range_ is intentionally
+ // unsigned. This is because hi - lo can overflow signed integers if
+ // lo < 0 < hi, but always fits in unsigned.
+ int64_t lo_;
+ uint64_t range_;
+};
+
+// Similar to `UniformDistribution`, except that instead of generating numbers
+// in the range [low, high), it generates numbers covering the whole range of
+// the integer type.
+template <typename Generator, typename IntType> class UniformFullIntDistribution;
+
+template <typename Generator, typename IntType> class UniformFullIntDistribution32
+{
+public:
+ // The number of elements that will be returned.
+ static constexpr int kResultElementCount = Generator::kResultElementCount;
+ // Cost of generation of a single element (in cycles).
+ static constexpr int kElementCost = 3;
+ // Indicate that this distribution may take variable number of samples
+ // during the runtime.
+ static constexpr bool kVariableSamplesPerOutput = false;
+ typedef Array<IntType, kResultElementCount> ResultType;
+ typedef IntType ResultElementType;
+
+ PHILOX_DEVICE_INLINE
+ ResultType operator()(Generator *gen)
+ {
+ typename Generator::ResultType sample = (*gen)();
+ ResultType result;
+ for (int i = 0; i < kResultElementCount; ++i)
+ {
+ result[i] = sample[i];
+ }
+ return result;
+ }
+};
+
+template <typename Generator, typename IntType> class UniformFullIntDistribution64
+{
+public:
+ // The number of elements that will be returned.
+ static constexpr int kResultElementCount = Generator::kResultElementCount / 2;
+ // Cost of generation of a single element (in cycles).
+ static constexpr int kElementCost = 3;
+ // Indicate that this distribution may take variable number of samples
+ // during the runtime.
+ static constexpr bool kVariableSamplesPerOutput = false;
+ typedef Array<IntType, kResultElementCount> ResultType;
+ typedef IntType ResultElementType;
+
+ PHILOX_DEVICE_INLINE
+ ResultType operator()(Generator *gen)
+ {
+ typename Generator::ResultType sample = (*gen)();
+ ResultType result;
+ for (int i = 0; i < kResultElementCount; ++i)
+ {
+ result[i] = sample[2 * i] | static_cast<uint64_t>(sample[2 * i + 1]) << 32;
+ }
+ return result;
+ }
+};
+
+template <typename Generator>
+class UniformFullIntDistribution<Generator, int32_t>
+ : public UniformFullIntDistribution32<Generator, int32_t>
+{
+};
+template <typename Generator>
+class UniformFullIntDistribution<Generator, uint32_t>
+ : public UniformFullIntDistribution32<Generator, uint32_t>
+{
+};
+template <typename Generator>
+class UniformFullIntDistribution<Generator, int64_t>
+ : public UniformFullIntDistribution64<Generator, int64_t>
+{
+};
+template <typename Generator>
+class UniformFullIntDistribution<Generator, uint64_t>
+ : public UniformFullIntDistribution64<Generator, uint64_t>
+{
+};
+
+// A class that adapts the underlying native multiple samples to return a single
+// sample at a time.
+template <class Generator> class SingleSampleAdapter
+{
+public:
+ // The number of elements that will be returned.
+ static constexpr int kResultElementCount = 1;
+ // The number of elements that will be returned by the underlying generator.
+ static constexpr int kNativeElementCount = Generator::kResultElementCount;
+ typedef typename Generator::ResultElementType ResultType;
+ typedef typename Generator::ResultElementType ResultElementType;
+
+ PHILOX_DEVICE_INLINE
+ explicit SingleSampleAdapter(Generator *gen)
+ : generator_(gen), used_result_index_(Generator::kResultElementCount)
+ {
+ }
+
+ PHILOX_DEVICE_INLINE
+ ResultType operator()()
+ {
+ if (used_result_index_ == Generator::kResultElementCount)
+ {
+ unused_results_ = (*generator_)();
+ used_result_index_ = 0;
+ }
+
+ return unused_results_[used_result_index_++];
+ }
+
+ PHILOX_DEVICE_INLINE
+ void Skip(uint64_t num_skips)
+ {
+ if (!num_skips)
+ {
+ return;
+ }
+ int num_unused_results = kNativeElementCount - used_result_index_;
+ if (num_skips <= num_unused_results)
+ {
+ used_result_index_ += num_skips;
+ return;
+ }
+ num_skips -= num_unused_results;
+ used_result_index_ = kNativeElementCount;
+ SkipFromGenerator(num_skips / kNativeElementCount);
+ num_skips = num_skips % kNativeElementCount;
+ if (num_skips)
+ {
+ unused_results_ = (*generator_)();
+ used_result_index_ = num_skips;
+ }
+ }
+
+private:
+ // This implementation iteratively skips over `num_skips` samples
+ // from `generator_`. There is an O(1) implementation for PhiloxRandom
+ // in random_distributions.cc.
+ PHILOX_DEVICE_INLINE
+ void SkipFromGenerator(uint64_t num_skips)
+ {
+ while (num_skips--)
+ {
+ (*generator_)();
+ }
+ }
+
+ Generator *generator_;
+ typename Generator::ResultType unused_results_;
+ int used_result_index_;
+};
+
+// A class that generates unit normal distribution random numbers from the
+// underlying random integer generator.
+// Arguments:
+// Generator: a generator type that returns a number of uint32 upon each
+// each invocation. It needs to define kResultElementCount for the
+// sample count for each invocation, and ResultType for actual
+// returned sample type.
+// RealType: the data type of the real numbers that will be returned by the
+// distribution. This could be either float or double for now.
+// This class is meant to be implemented through specialization. The default
+// is not defined by design.
+template <class Generator, typename RealType> class NormalDistribution;
+
+PHILOX_DEVICE_INLINE
+void BoxMullerFloat(uint32_t x0, uint32_t x1, float *f0, float *f1);
+
+PHILOX_DEVICE_INLINE
+void BoxMullerDouble(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, double *d0, double *d1);
+
+// Exactly like the float version, except that we convert to half afterwards;
+// since we don't have half-precision sin/cos even on GPUs, there's nothing to
+// gain from working in half internally.
+template <class Generator> class NormalDistribution<Generator, Eigen::half>
+{
+public:
+ // The number of elements that will be returned.
+ static constexpr int kResultElementCount = Generator::kResultElementCount;
+ // Cost of generation of a single element (in cycles).
+ static constexpr int kElementCost = 70;
+ // Indicate that this distribution may take variable number of samples
+ // during the runtime.
+ static constexpr bool kVariableSamplesPerOutput = false;
+ typedef Array<Eigen::half, kResultElementCount> ResultType;
+ typedef Eigen::half ResultElementType;
+
+ PHILOX_DEVICE_INLINE
+ ResultType operator()(Generator *gen)
+ {
+ typename Generator::ResultType sample = (*gen)();
+ ResultType result;
+ for (int i = 0; i < kResultElementCount; i += 2)
+ {
+ float f[2];
+ BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]);
+ result[i] = Eigen::half(f[0]);
+ result[i + 1] = Eigen::half(f[1]);
+ }
+ return result;
+ }
+};
+
+template <class Generator> class NormalDistribution<Generator, float>
+{
+public:
+ // The number of elements that will be returned.
+ static constexpr int kResultElementCount = Generator::kResultElementCount;
+ // Cost of generation of a single element (in cycles).
+ static constexpr int kElementCost = 70;
+ // Indicate that this distribution may take variable number of samples
+ // during the runtime.
+ static constexpr bool kVariableSamplesPerOutput = false;
+ typedef Array<float, kResultElementCount> ResultType;
+ typedef float ResultElementType;
+
+ PHILOX_DEVICE_INLINE
+ ResultType operator()(Generator *gen)
+ {
+ typename Generator::ResultType sample = (*gen)();
+ ResultType result;
+ for (int i = 0; i < kResultElementCount; i += 2)
+ {
+ BoxMullerFloat(sample[i], sample[i + 1], &result[i], &result[i + 1]);
+ }
+ return result;
+ }
+};
+
+template <class Generator> class NormalDistribution<Generator, double>
+{
+public:
+ // The number of elements that will be returned.
+ static constexpr int kResultElementCount = Generator::kResultElementCount / 2;
+ // Cost of generation of a single element (in cycles).
+ static constexpr int kElementCost = 70;
+ // Indicate that this distribution may take variable number of samples
+ // during the runtime.
+ static constexpr bool kVariableSamplesPerOutput = false;
+ typedef Array<double, kResultElementCount> ResultType;
+ typedef double ResultElementType;
+
+ PHILOX_DEVICE_INLINE
+ ResultType operator()(Generator *gen)
+ {
+ typename Generator::ResultType sample = (*gen)();
+ ResultType result;
+ for (int i = 0; i < kResultElementCount; i += 2)
+ {
+ const int i2 = 2 * i;
+ BoxMullerDouble(sample[i2], sample[i2 + 1], sample[i2 + 2], sample[i2 + 3], &result[i],
+ &result[i + 1]);
+ }
+ return result;
+ }
+};
+
+// A class that returns standard normal distribution between
+// [-kTruncateValue, kTruncateValue].
+// Arguments:
+// Generator: a generator type that returns a number of uint32 upon each
+// each invocation. It needs to define kResultElementCount for the
+// sample count for each invocation, and ResultType for actual
+// returned sample type.
+// RealType: the data type of the real numbers that will be returned by the
+// distribution. This could be either float or double for now.
+// This class is meant to be implemented through specialization. The default
+// is not defined by design.
+template <class SingleSampleGenerator, typename RealType> class TruncatedNormalDistribution;
+
+// Exactly like the float version, except that we convert to half afterwards;
+// since we don't have half-precision sin/cos even on GPUs, there's nothing to
+// gain from working in half internally.
+template <class SingleSampleGenerator>
+class TruncatedNormalDistribution<SingleSampleGenerator, Eigen::half>
+{
+public:
+ // The number of elements that will be returned.
+ static constexpr int kResultElementCount = SingleSampleGenerator::kNativeElementCount;
+ // Cost of generation of a single element (in cycles).
+ static constexpr int kElementCost = 90;
+ // Indicate that this distribution may take variable number of samples
+ // during the runtime.
+ static constexpr bool kVariableSamplesPerOutput = true;
+ // The threshold where the normal distribution is truncated.
+ const float kTruncateValue = 2.0f;
+
+ typedef Array<Eigen::half, kResultElementCount> ResultType;
+ typedef Eigen::half ResultElementType;
+
+ PHILOX_DEVICE_INLINE
+ ResultType operator()(SingleSampleGenerator *gen)
+ {
+ ResultType results;
+ int index = 0;
+ while (true)
+ {
+ // Repeatedly take samples from the normal distribution, until we have
+ // the desired number of elements that fall within the pre-defined cutoff
+ // threshold.
+ const uint32_t x0 = (*gen)();
+ const uint32_t x1 = (*gen)();
+ float f[2];
+ BoxMullerFloat(x0, x1, &f[0], &f[1]);
+
+ if (Eigen::numext::abs(f[0]) < kTruncateValue)
+ {
+ results[index++] = Eigen::half(f[0]);
+ if (index >= kResultElementCount)
+ {
+ return results;
+ }
+ }
+ if (Eigen::numext::abs(f[1]) < kTruncateValue)
+ {
+ results[index++] = Eigen::half(f[1]);
+ if (index >= kResultElementCount)
+ {
+ return results;
+ }
+ }
+ }
+ }
+};
+
+// Partial specialization for float.
+template <class SingleSampleGenerator>
+class TruncatedNormalDistribution<SingleSampleGenerator, float>
+{
+public:
+ // The number of elements that will be returned.
+ static constexpr int kResultElementCount = SingleSampleGenerator::kNativeElementCount;
+ // Cost of generation of a single element (in cycles).
+ static constexpr int kElementCost = 90;
+ // Indicate that this distribution may take variable number of samples
+ // during the runtime.
+ static constexpr bool kVariableSamplesPerOutput = true;
+ // The threshold where the normal distribution is truncated.
+ const float kTruncateValue = 2.0f;
+
+ typedef Array<float, kResultElementCount> ResultType;
+ typedef float ResultElementType;
+
+ PHILOX_DEVICE_INLINE
+ ResultType operator()(SingleSampleGenerator *gen)
+ {
+ ResultType results;
+ int index = 0;
+ while (true)
+ {
+ // Repeatedly take samples from the normal distribution, until we have
+ // the desired number of elements that fall within the pre-defined cutoff
+ // threshold.
+ const uint32_t x0 = (*gen)();
+ const uint32_t x1 = (*gen)();
+ float f[2];
+ BoxMullerFloat(x0, x1, &f[0], &f[1]);
+
+ if (Eigen::numext::abs(f[0]) < kTruncateValue)
+ {
+ results[index++] = f[0];
+ if (index >= kResultElementCount)
+ {
+ return results;
+ }
+ }
+ if (Eigen::numext::abs(f[1]) < kTruncateValue)
+ {
+ results[index++] = f[1];
+ if (index >= kResultElementCount)
+ {
+ return results;
+ }
+ }
+ }
+ }
+};
+
+// Partial specialization for double.
+template <class SingleSampleGenerator>
+class TruncatedNormalDistribution<SingleSampleGenerator, double>
+{
+public:
+ // The number of elements that will be returned.
+ static constexpr int kResultElementCount = (SingleSampleGenerator::kNativeElementCount > 1)
+ ? SingleSampleGenerator::kNativeElementCount / 2
+ : 1;
+ // Cost of generation of a single element (in cycles).
+ static constexpr int kElementCost = 90;
+ // Indicate that this distribution may take variable number of samples
+ // during the runtime.
+ static constexpr bool kVariableSamplesPerOutput = true;
+ typedef Array<double, kResultElementCount> ResultType;
+ typedef double ResultElementType;
+ const double kTruncateValue = 2.0;
+
+ PHILOX_DEVICE_INLINE
+ ResultType operator()(SingleSampleGenerator *gen)
+ {
+ ResultType results;
+ int index = 0;
+ while (1)
+ {
+ const uint32_t x0 = (*gen)();
+ const uint32_t x1 = (*gen)();
+ const uint32_t x2 = (*gen)();
+ const uint32_t x3 = (*gen)();
+ double d[2];
+ BoxMullerDouble(x0, x1, x2, x3, &d[0], &d[1]);
+
+ if (Eigen::numext::abs(d[0]) < kTruncateValue)
+ {
+ results[index++] = d[0];
+ if (index >= kResultElementCount)
+ {
+ return results;
+ }
+ }
+ if (Eigen::numext::abs(d[1]) < kTruncateValue)
+ {
+ results[index++] = d[1];
+ if (index >= kResultElementCount)
+ {
+ return results;
+ }
+ }
+ }
+ }
+};
+
+// Helper function to convert two 32-bit uniform integers to two floats
+// under the unit normal distribution.
+PHILOX_DEVICE_INLINE
+void BoxMullerFloat(uint32_t x0, uint32_t x1, float *f0, float *f1)
+{
+ // This function implements the Box-Muller transform:
+ // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
+ // Do not send a really small number to log().
+ // We cannot mark "epsilon" as "static const" because NVCC would complain
+ const float epsilon = 1.0e-7f;
+ float u1 = Uint32ToFloat(x0);
+ if (u1 < epsilon)
+ {
+ u1 = epsilon;
+ }
+ const float v1 = 2.0f * M_PI * Uint32ToFloat(x1);
+ const float u2 = Eigen::numext::sqrt(-2.0f * Eigen::numext::log(u1));
+#if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
+ *f0 = Eigen::numext::sin(v1);
+ *f1 = Eigen::numext::cos(v1);
+#else
+ sincosf(v1, f0, f1);
+#endif
+ *f0 *= u2;
+ *f1 *= u2;
+}
+
+// Helper function to convert four 32-bit uniform integers to two doubles
+// under the unit normal distribution.
+PHILOX_DEVICE_INLINE
+void BoxMullerDouble(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, double *d0, double *d1)
+{
+ // This function implements the Box-Muller transform:
+ // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
+ // Do not send a really small number to log().
+ // We cannot mark "epsilon" as "static const" because NVCC would complain
+ const double epsilon = 1.0e-7;
+ double u1 = Uint64ToDouble(x0, x1);
+ if (u1 < epsilon)
+ {
+ u1 = epsilon;
+ }
+ const double v1 = 2 * M_PI * Uint64ToDouble(x2, x3);
+ const double u2 = Eigen::numext::sqrt(-2.0 * Eigen::numext::log(u1));
+#if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
+ *d0 = Eigen::numext::sin(v1);
+ *d1 = Eigen::numext::cos(v1);
+#else
+ sincos(v1, d0, d1);
+#endif
+ *d0 *= u2;
+ *d1 *= u2;
+}
+
+// Helper function to convert an 16-bit integer to a half between [0..1).
+PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16_t x)
+{
+ // IEEE754 halfs are formatted as follows (MSB first):
+ // sign(1) exponent(5) mantissa(10)
+ // Conceptually construct the following:
+ // sign == 0
+ // exponent == 15 -- an excess 15 representation of a zero exponent
+ // mantissa == 10 random bits
+ const uint16_t man = x & 0x3ffu; // 10 bit mantissa
+ const uint16_t exp = static_cast<uint16_t>(15);
+ const uint16_t val = (exp << 10) | man;
+
+ Eigen::half result;
+ result.x = val;
+ return result - Eigen::half(1.0);
+}
+
+// Helper function to convert an 32-bit integer to a float between [0..1).
+PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32_t x)
+{
+ // IEEE754 floats are formatted as follows (MSB first):
+ // sign(1) exponent(8) mantissa(23)
+ // Conceptually construct the following:
+ // sign == 0
+ // exponent == 127 -- an excess 127 representation of a zero exponent
+ // mantissa == 23 random bits
+ const uint32_t man = x & 0x7fffffu; // 23 bit mantissa
+ const uint32_t exp = static_cast<uint32_t>(127);
+ const uint32_t val = (exp << 23) | man;
+
+ // Assumes that endian-ness is same for float and uint32.
+ float result;
+ memcpy(&result, &val, sizeof(val));
+ return result - 1.0f;
+}
+
+// Helper function to convert two 32-bit integers to a double between [0..1).
+PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32_t x0, uint32_t x1)
+{
+ // IEEE754 doubles are formatted as follows (MSB first):
+ // sign(1) exponent(11) mantissa(52)
+ // Conceptually construct the following:
+ // sign == 0
+ // exponent == 1023 -- an excess 1023 representation of a zero exponent
+ // mantissa == 52 random bits
+ const uint32_t mhi = x0 & 0xfffffu; // upper 20 bits of mantissa
+ const uint32_t mlo = x1; // lower 32 bits of mantissa
+ const uint64_t man = (static_cast<uint64_t>(mhi) << 32) | mlo; // mantissa
+ const uint64_t exp = static_cast<uint64_t>(1023);
+ const uint64_t val = (exp << 52) | man;
+ // Assumes that endian-ness is same for double and uint64.
+ double result;
+ memcpy(&result, &val, sizeof(val));
+ return result - 1.0;
+}
+
+} // namespace random
+} // namespace tensorflow
+}
+
+#endif // __NNFW_CKER_HELPER_RANDOM_DISTRIBUTIONS_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_HELPER_RANDOM_OP_H__
+#define __NNFW_CKER_HELPER_RANDOM_OP_H__
+
+#include "cker/Types.h"
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+#include "cker/operation/Helper/RandomDistributions.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+namespace functor
+{
+
+template <typename Device, class Distribution> struct FillPhiloxRandom;
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+// Declares the partially CPU-specialized functor struct.
+//
+// NOTE: Due to inlining done by the compiler, you may need to add
+// explicit instantiation of the functor in random_op.cc. See example
+// functor::FillPhiloxRandom<CPUDevice, random::UniformDistribution>.
+template <class Distribution> struct FillPhiloxRandom<CPUDevice, Distribution>
+{
+ void operator()(random::PhiloxRandom gen, typename Distribution::ResultElementType *data,
+ int64_t size, Distribution dist);
+};
+
+} // namespace functor
+} // namespace tensorflow
+}
+#endif // __NNFW_CKER_HELPER_RANDOM_OP_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_HELPER_RANDOM_OP_CPU_H__
+#define __NNFW_CKER_HELPER_RANDOM_OP_CPU_H__
+
+#define EIGEN_USE_THREADS
+
+#include <algorithm>
+#include <cmath>
+#include <memory>
+
+#include "cker/Types.h"
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+#include "cker/eigen/EigenSupport.h"
+
+#include "cker/operation/Helper/PhiloxRandom.h"
+#include "cker/operation/Helper/RandomOp.h"
+#include "cker/operation/Helper/RandomDistributions.h"
+
+#if EIGEN_COMP_GNUC && __cplusplus > 199711L
+#define DISABLE_FLOAT_EQUALITY_WARNING \
+ _Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
+#define ENABLE_FLOAT_EQUALITY_WARNING _Pragma("GCC diagnostic pop")
+#else
+#define DISABLE_FLOAT_EQUALITY_WARNING
+#define ENABLE_FLOAT_EQUALITY_WARNING
+#endif
+
+namespace nnfw
+{
+namespace cker
+{
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+namespace functor
+{
+using random::PhiloxRandom;
+using random::SingleSampleAdapter;
+
+// The default implementation of the functor, which should never be invoked
+// But we still need to provide implementation for now for the linker to work,
+// since we do not support all the distributions yet.
+template <typename Device, class Distribution> struct FillPhiloxRandom
+{
+ typedef typename Distribution::ResultElementType T;
+ void operator()() {}
+};
+
+// A class to fill a specified range of random groups
+template <class Distribution, bool VariableSamplesPerOutput> struct FillPhiloxRandomTask;
+
+// Specialization for distribution that takes a fixed number of samples for
+// each output.
+template <class Distribution> struct FillPhiloxRandomTask<Distribution, false>
+{
+ typedef typename Distribution::ResultElementType T;
+ static void Run(random::PhiloxRandom gen, T *data, int64_t size, Distribution dist)
+ {
+ const int kGroupSize = Distribution::kResultElementCount;
+ gen.Skip(0);
+ int64_t offset = 0;
+
+ // First fill all the full-size groups
+ int64_t limit_group_full = size / kGroupSize;
+ for (int64_t index = 0; index < limit_group_full; ++index)
+ {
+ auto samples = dist(&gen);
+ std::copy(&samples[0], &samples[0] + kGroupSize, data + offset);
+ offset += kGroupSize;
+ }
+
+ int64_t remaining_size = size - limit_group_full * kGroupSize;
+
+ // If there are any remaining elements that need to be filled, process them
+ if (remaining_size > 0)
+ {
+ auto samples = dist(&gen);
+ std::copy(&samples[0], &samples[0] + remaining_size, data + offset);
+ }
+ }
+};
+
+// Specialization for distribution that takes a variable number of samples for
+// each output. This will be slower due to the generality.
+template <class Distribution> struct FillPhiloxRandomTask<Distribution, true>
+{
+ typedef typename Distribution::ResultElementType T;
+ static constexpr int64_t kReservedSamplesPerOutput = 256;
+
+ static void Run(random::PhiloxRandom base_gen, T *data, int64_t size, Distribution dist)
+ {
+ const int kGroupSize = Distribution::kResultElementCount;
+ static const int kGeneratorSkipPerOutputGroup =
+ kGroupSize * kReservedSamplesPerOutput / PhiloxRandom::kResultElementCount;
+
+ int64_t offset = 0;
+
+ // First fill all the full-size groups
+ int64_t limit_group_full = size / kGroupSize;
+ int64_t group_index;
+ for (group_index = 0; group_index < limit_group_full; ++group_index)
+ {
+ // Reset the generator to the beginning of the output group region
+ // This is necessary if we want the results to be independent of order
+ // of work
+ PhiloxRandom gen = base_gen;
+ gen.Skip(group_index * kGeneratorSkipPerOutputGroup);
+ SingleSampleAdapter<PhiloxRandom> single_samples(&gen);
+
+ auto samples = dist(&single_samples);
+ std::copy(&samples[0], &samples[0] + kGroupSize, data + offset);
+ offset += kGroupSize;
+ }
+
+ int64_t remaining_size = size - limit_group_full * kGroupSize;
+ // If there are any remaining elements that need to be filled, process them
+ if (remaining_size > 0)
+ {
+ PhiloxRandom gen = base_gen;
+ gen.Skip(group_index * kGeneratorSkipPerOutputGroup);
+ SingleSampleAdapter<PhiloxRandom> single_samples(&gen);
+
+ auto samples = dist(&single_samples);
+ std::copy(&samples[0], &samples[0] + remaining_size, data + offset);
+ }
+ }
+};
+
+// Partial specialization for CPU to fill the entire region with randoms
+// It splits the work into several tasks and run them in parallel
+template <class Distribution>
+void FillPhiloxRandom<CPUDevice, Distribution>::
+operator()(random::PhiloxRandom gen, typename Distribution::ResultElementType *data, int64_t size,
+ Distribution dist)
+{
+ FillPhiloxRandomTask<Distribution, Distribution::kVariableSamplesPerOutput>::Run(gen, data, size,
+ dist);
+}
+
+} // namespace functor
+
+} // end namespace tensorflow
+}
+
+#endif // __NNFW_CKER_HELPER_RANDOM_OP_CPU_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_L2NORMALIZE_H__
+#define __NNFW_CKER_L2NORMALIZE_H__
+
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+#include "cker/Types.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+void L2NormalizeFloat32(const Shape &input_shape, const float *input_data,
+ const Shape &output_shape, float *output_data)
+{
+ float epsilon = 1e-6;
+ const int trailing_dim = input_shape.DimensionsCount() - 1;
+ const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+ const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+ for (int i = 0; i < outer_size; ++i)
+ {
+ float squared_l2_norm = 0;
+ for (int c = 0; c < depth; ++c)
+ {
+ const float val = input_data[c];
+ squared_l2_norm += val * val;
+ }
+ float l2_norm = std::sqrt(squared_l2_norm);
+ l2_norm = std::max(l2_norm, epsilon);
+ for (int c = 0; c < depth; ++c)
+ {
+ *output_data = *input_data / l2_norm;
+ ++output_data;
+ ++input_data;
+ }
+ }
+}
+
+void L2NormalizeQuant8(L2NormParams ¶ms, const Shape &input_shape, const uint8_t *input_data,
+ const Shape &output_shape, uint8_t *output_data)
+{
+ const int trailing_dim = input_shape.DimensionsCount() - 1;
+ const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+ const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+ const int32_t input_zero_point = params.input_zero_point;
+
+ for (int i = 0; i < outer_size; ++i)
+ {
+ int32_t square_l2_norm = 0;
+ for (int c = 0; c < depth; c++)
+ {
+ // Note that input_data advances by depth in the second pass below.
+ int32_t diff = input_data[c] - input_zero_point;
+ square_l2_norm += diff * diff;
+ }
+ int32_t inv_l2norm_multiplier;
+ int inv_l2norm_shift;
+ GetInvSqrtQuantizedMultiplierExp(square_l2_norm, -1, &inv_l2norm_multiplier, &inv_l2norm_shift);
+ for (int c = 0; c < depth; c++)
+ {
+ int32_t diff = *input_data - input_zero_point;
+ int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
+ int32_t unclamped_output_val = 128 + rescaled_diff;
+ int32_t output_val = std::min(static_cast<int32_t>(255),
+ std::max(static_cast<int32_t>(0), unclamped_output_val));
+ *output_data = static_cast<uint8_t>(output_val);
+ ++input_data;
+ ++output_data;
+ }
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_L2NORMALIZE_H__
inline void Logistic(const Shape &input_shape, const float *input_data, const Shape &output_shape,
float *output_data)
{
-#ifdef __aarch64__
auto input_map = MapAsVector(input_data, input_shape);
auto output_map = MapAsVector(output_data, output_shape);
output_map.array() = input_map.array().unaryExpr(Eigen::internal::scalar_logistic_op<float>());
-#else
- // Note, this can be done using TANH: (1/2) + (1/2) * TANH(x/2)
- const int size = MatchingFlatSize(input_shape, output_shape);
- for (int i = 0; i < size; i++)
- {
- output_data[i] = 1.f / (1.f + std::exp(-input_data[i]));
- }
-#endif
}
} // namespace cker
{
auto last_dim = input_shape.DimensionsCount() - 1;
- T batch_num = 0;
- for (int dim = 0; dim < last_dim - 2; dim++)
+ T batch_num = 1;
+ for (int dim = 0; dim < input_shape.DimensionsCount() - 2; dim++)
{
- batch_num += input_shape.Dims(dim);
+ batch_num *= input_shape.Dims(dim);
}
const T row_num = input_shape.Dims(last_dim - 1);
{
namespace cker
{
+template <typename T>
inline void Pad(const int32_t *padding_data, int32_t pad_rank, const Shape &input_shape,
- const float *input_data, const Shape &output_shape, float *output_data,
- const float *constant_value_data)
+ const T *input_data, const Shape &output_shape, T *output_data,
+ const T *constant_value_data)
{
// Note, this is pad with mode=`CONSTANT`: it doesn't support `REFLECT` and `SYMMETRIC`
// TODO: come up with more subtle solution that uses subtensors like arm compute
/** List of padding information */
using PaddingList = std::vector<PaddingInfo>;
- auto constant_value = constant_value_data ? *constant_value_data : 0;
+ const T constant_value = constant_value_data ? *constant_value_data : 0;
assert(output_shape.DimensionsCount() == input_shape.DimensionsCount());
PaddingList padding_list(pad_rank);
{
const int32_t in_row_len = input_shape.Dims(0);
std::fill_n(output_data, padding_list[0].first, constant_value);
- std::memcpy(output_data + padding_list[0].first, input_data, in_row_len * sizeof(float));
+ std::memcpy(output_data + padding_list[0].first, input_data, in_row_len * sizeof(T));
std::fill_n(output_data + padding_list[0].first + in_row_len, padding_list[0].second,
constant_value);
break;
out_offset += padding_list[1].first;
// copy a row of input data
- memcpy(output_data + out_offset, input_data + in_offset, in_row_len * sizeof(float));
+ memcpy(output_data + out_offset, input_data + in_offset, in_row_len * sizeof(T));
out_offset += in_row_len;
out_offset += padding_list[2].first;
// copy a row of input data
- memcpy(output_data + out_offset, input_data + in_offset, in_row_len * sizeof(float));
+ memcpy(output_data + out_offset, input_data + in_offset, in_row_len * sizeof(T));
out_offset += in_row_len;
out_c_offset += padding_list[3].first;
// copy a row of input data
- memcpy(output_data + out_c_offset, input_data + in_offset, in_row_len * sizeof(float));
+ memcpy(output_data + out_c_offset, input_data + in_offset, in_row_len * sizeof(T));
out_c_offset += in_row_len;
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_QUANTIZE_H__
+#define __NNFW_CKER_QUANTIZE_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+#include <stdexcept>
+#include <iostream>
+namespace nnfw
+{
+namespace cker
+{
+template <typename InputT, typename OutputT>
+inline void Quantize(const Shape &input_shape, const InputT *input_data, const Shape &output_shape,
+ OutputT *output_data, const float output_scale, const int32_t output_offset)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ int min_val = std::numeric_limits<OutputT>::min();
+ int max_val = std::numeric_limits<OutputT>::max();
+
+ for (int i = 0; i < flat_size; i++)
+ {
+ int32_t unclamped = static_cast<int32_t>(round(input_data[i] / output_scale)) + output_offset;
+ int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+ output_data[i] = clamped;
+ }
+}
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_QUANTIZE_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_RELU6_H__
+#define __NNFW_CKER_RELU6_H__
+
+#include "cker/Shape.h"
+#include "cker/eigen/Utils.h"
+
+#include <cmath>
+#include <Eigen/Core>
+
+namespace nnfw
+{
+namespace cker
+{
+
+inline void ReLU6(const Shape &input_shape, const float *input_data, float *output_data)
+{
+ int size = input_shape.FlatSize();
+
+ for (int i = 0; i < size; ++i)
+ {
+ if (input_data[i] <= 0)
+ {
+ output_data[i] = 0;
+ }
+ else if (input_data[i] > 6.0)
+ {
+ output_data[i] = 6.0;
+ }
+ else
+ {
+ output_data[i] = input_data[i];
+ }
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_RELU6_H__
num_resolved_axis, temp_index_data(), reducer, output_data);
}
+ // Computes the mean of elements across dimensions given in axis.
+ // It does so in two stages, first calculates the sum of elements along the axis
+ // then divides it by the number of element in axis for quantized values.
+ template <typename T, typename U>
+ inline bool QuantizedMeanOrSum(const T *input_data, int32_t input_zero_point, float input_scale,
+ const Shape &input_shape, T *output_data,
+ int32_t output_zero_point, float output_scale,
+ const Shape &output_shape, const std::vector<int> &axes,
+ bool /*keep_dims*/, U *temp_sum, bool compute_sum,
+ U reducer(const U current, const T in))
+ {
+ // Reset output data.
+ size_t num_outputs = 1;
+ for (int idx = 0; idx < output_shape.DimensionsCount(); ++idx)
+ {
+ size_t current = static_cast<size_t>(output_shape.Dims(idx));
+ // Overflow prevention.
+ if (num_outputs > std::numeric_limits<size_t>::max() / current)
+ {
+ return false;
+ }
+ num_outputs *= current;
+ }
+ for (size_t idx = 0; idx < num_outputs; ++idx)
+ {
+ output_data[idx] = T();
+ temp_sum[idx] = U();
+ }
+
+ // Resolve axis.
+ int num_resolved_axis = 0;
+ if (!ResolveAxis(input_shape.DimensionsCount(), axes, resolved_axis_data(), &num_resolved_axis))
+ {
+ return false;
+ }
+
+ if (!ReduceImpl<T, U>(input_data, input_shape, output_shape, resolved_axis_data(),
+ num_resolved_axis, temp_index_data(), reducer, temp_sum))
+ {
+ return false;
+ }
+
+ // Calculate mean by dividing output_data by num of aggregated element.
+ U num_elements_in_axis = 1;
+ for (int idx = 0; idx < num_resolved_axis; ++idx)
+ {
+ size_t current = static_cast<size_t>(input_shape.Dims(resolved_axis_data()[idx]));
+ // Overflow prevention.
+ if (current > static_cast<size_t>(std::numeric_limits<U>::max() / num_elements_in_axis))
+ {
+ return false;
+ }
+ num_elements_in_axis *= current;
+ }
+
+ if (num_elements_in_axis > 0)
+ {
+ const float scale = input_scale / output_scale;
+ if (compute_sum)
+ {
+ // TODO(b/116341117): Eliminate float and do this completely in 8bit.
+ const float bias = -input_zero_point * scale * num_elements_in_axis + 0.5f;
+ for (size_t idx = 0; idx < num_outputs; ++idx)
+ {
+ const U value =
+ static_cast<U>(std::round(temp_sum[idx] * scale + bias)) + output_zero_point;
+ output_data[idx] = static_cast<T>(value);
+ }
+ }
+ else
+ {
+ const float bias = -input_zero_point * scale + 0.5f;
+ for (size_t idx = 0; idx < num_outputs; ++idx)
+ {
+ float float_mean =
+ static_cast<float>(temp_sum[idx]) / static_cast<float>(num_elements_in_axis);
+ float result = std::min(std::round(float_mean * scale + bias) + output_zero_point,
+ static_cast<float>(std::numeric_limits<T>::max()));
+ result = std::max(result, static_cast<float>(std::numeric_limits<T>::min()));
+ output_data[idx] = static_cast<T>(result);
+ }
+ }
+ }
+ return true;
+ }
+
inline int32_t *resolved_axis_data(void)
{
return _resolved_axis.size() ? _resolved_axis.data() : _resolved_axis_small;
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_RESIZEBILINEAR_H__
+#define __NNFW_CKER_RESIZEBILINEAR_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include <cmath>
+
+namespace nnfw
+{
+namespace cker
+{
+
+inline void ResizeBilinearKernel2x2(int32_t x0, int32_t x1, int32_t y0, int32_t y1, int32_t x,
+ int32_t y, int32_t depth, int32_t batch,
+ const Shape &input_shape, const float *input_data,
+ const Shape &output_shape, float *output_data)
+{
+ const int32_t input_width = input_shape.Dims(2);
+ const int32_t output_width = output_shape.Dims(2);
+
+ const int32_t input_x_offset = (x1 - x0) * depth;
+ const int32_t input_y_offset = (y1 - y0) * depth * input_width;
+ const int32_t output_x_offset = depth;
+ const int32_t output_y_offset = depth * output_width;
+
+ for (int ch = 0; ch < depth; ch++)
+ {
+ const int32_t input_offset = Offset(input_shape, batch, y0, x0, ch);
+
+ float x0y0 = input_data[input_offset];
+ float x1y0 = input_data[input_offset + input_x_offset];
+ float x0y1 = input_data[input_offset + input_y_offset];
+ float x1y1 = input_data[input_offset + input_x_offset + input_y_offset];
+
+ // Top left corner.
+ const int32_t output_offset = Offset(output_shape, batch, y, x, ch);
+ output_data[output_offset] = x0y0;
+
+ // Top right corner.
+ output_data[output_offset + output_x_offset] = (x0y0 + x1y0) / 2;
+
+ // Bottom left corner.
+ float output = (x0y0 + x0y1) / 2;
+ output_data[output_offset + output_y_offset] = output;
+
+ // Bottom right corner.
+ output_data[output_offset + output_x_offset + output_y_offset] =
+ (output + ((x1y0 + x1y1) / 2)) / 2;
+ }
+}
+
+inline void ResizeBilinear2x2(int32_t batches, int32_t input_height, int32_t input_width,
+ int32_t depth, int32_t output_height, int32_t output_width,
+ const Shape &input_shape, const float *input_data,
+ const Shape &output_shape, float *output_data)
+{
+ for (int b = 0; b < batches; b++)
+ {
+ for (int y0 = 0, y = 0; y <= output_height - 2; y += 2, y0++)
+ {
+ for (int x0 = 0, x = 0; x <= output_width - 2; x += 2, x0++)
+ {
+ int32_t x1 = std::min(x0 + 1, input_width - 1);
+ int32_t y1 = std::min(y0 + 1, input_height - 1);
+ ResizeBilinearKernel2x2(x0, x1, y0, y1, x, y, depth, b, input_shape, input_data,
+ output_shape, output_data);
+ }
+ }
+ }
+}
+
+inline void ResizeBilinearKernel(const float *input_ptr, int32_t depth, float scale,
+ float *output_ptr)
+{
+ for (int32_t i = 0; i < depth; i++)
+ {
+ *output_ptr += *input_ptr * scale;
+ output_ptr++;
+ input_ptr++;
+ }
+}
+
+inline void ComputeInterpolationValues(const float value, const float scale,
+ const bool half_pixel_centers, int32_t input_size,
+ float *scaled_value, int32_t *lower_bound,
+ int32_t *upper_bound)
+{
+ if (half_pixel_centers)
+ {
+ *scaled_value = (value + 0.5f) * scale - 0.5f;
+ }
+ else
+ {
+ *scaled_value = value * scale;
+ }
+ float scaled_value_floor = std::floor(*scaled_value);
+ *lower_bound = std::max(static_cast<int32_t>(scaled_value_floor), static_cast<int32_t>(0));
+ *upper_bound = std::min(static_cast<int32_t>(std::ceil(*scaled_value)), input_size - 1);
+}
+
+inline void ResizeBilinearGeneric(int32_t batches, int32_t input_height, int32_t input_width,
+ int32_t depth, int32_t output_height, int32_t output_width,
+ float height_scale, float width_scale, const Shape &input_shape,
+ const float *input_data, float *output_data,
+ const bool half_pixel_centers)
+{
+ memset(output_data, 0, batches * output_height * output_width * depth * sizeof(float));
+
+ int32_t output_offset = 0;
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int y = 0; y < output_height; ++y)
+ {
+ float input_y;
+ int32_t y0, y1;
+ ComputeInterpolationValues(y, height_scale, half_pixel_centers, input_height, &input_y, &y0,
+ &y1);
+ for (int x = 0; x < output_width; ++x)
+ {
+ float input_x;
+ int32_t x0, x1;
+ ComputeInterpolationValues(x, width_scale, half_pixel_centers, input_width, &input_x, &x0,
+ &x1);
+ float *output_ptr = &output_data[output_offset];
+
+ // Run kernel on the 4 corners of the bilinear resize algorithm.
+ int32_t input_offset = Offset(input_shape, b, y0, x0, 0);
+ float scale = (1 - (input_y - y0)) * (1 - (input_x - x0));
+ const float *input_ptr = &input_data[input_offset];
+ ResizeBilinearKernel(input_ptr, depth, scale, output_ptr);
+
+ input_offset = Offset(input_shape, b, y0, x1, 0);
+ scale = (1 - (input_y - y0)) * (input_x - x0);
+ input_ptr = &input_data[input_offset];
+ ResizeBilinearKernel(input_ptr, depth, scale, output_ptr);
+
+ input_offset = Offset(input_shape, b, y1, x0, 0);
+ scale = (input_y - y0) * (1 - (input_x - x0));
+ input_ptr = &input_data[input_offset];
+ ResizeBilinearKernel(input_ptr, depth, scale, output_ptr);
+
+ input_offset = Offset(input_shape, b, y1, x1, 0);
+ scale = (input_y - y0) * (input_x - x0);
+ input_ptr = &input_data[input_offset];
+ ResizeBilinearKernel(input_ptr, depth, scale, output_ptr);
+
+ output_offset += depth;
+ }
+ }
+ }
+}
+
+template <typename T>
+inline void ResizeBilinearGenericSmallChannel(int32_t batches, int32_t input_height,
+ int32_t input_width, int32_t depth,
+ int32_t output_height, int32_t output_width,
+ float height_scale, float width_scale,
+ const Shape &input_shape, const T *input_data,
+ T *output_data, const bool half_pixel_centers)
+{
+ T *output_ptr = &output_data[0];
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int y = 0; y < output_height; ++y)
+ {
+ float input_y;
+ int32_t y0, y1;
+ ComputeInterpolationValues(y, height_scale, half_pixel_centers, input_height, &input_y, &y0,
+ &y1);
+ for (int x = 0; x < output_width; ++x)
+ {
+ float input_x;
+ int32_t x0, x1;
+ ComputeInterpolationValues(x, width_scale, half_pixel_centers, input_width, &input_x, &x0,
+ &x1);
+
+ int32_t input_offset[4] = {
+ Offset(input_shape, b, y0, x0, 0), Offset(input_shape, b, y0, x1, 0),
+ Offset(input_shape, b, y1, x0, 0), Offset(input_shape, b, y1, x1, 0)};
+ float scale[4] = {(1 - (input_y - y0)) * (1 - (input_x - x0)),
+ (1 - (input_y - y0)) * (input_x - x0),
+ (input_y - y0) * (1 - (input_x - x0)), (input_y - y0) * (input_x - x0)};
+
+ for (int d = 0; d < depth; d++)
+ {
+ const T *input_ptr = &input_data[d];
+ *output_ptr++ = static_cast<T>(
+ input_ptr[input_offset[0]] * scale[0] + input_ptr[input_offset[1]] * scale[1] +
+ input_ptr[input_offset[2]] * scale[2] + input_ptr[input_offset[3]] * scale[3]);
+ }
+ }
+ }
+ }
+}
+
+void ResizeBilinear(ResizeBilinearParams ¶ms, const Shape &input_shape, const float *input_data,
+ const Shape &output_shape, float *output_data)
+{
+ int32_t batches = static_cast<int32_t>(MatchingDim(input_shape, 0, output_shape, 0));
+ int32_t input_height = input_shape.Dims(1);
+ int32_t input_width = input_shape.Dims(2);
+ int32_t depth = static_cast<int32_t>(MatchingDim(input_shape, 3, output_shape, 3));
+
+ // Specialize for 2x2 upsample.
+ if (!params.align_corners && !params.half_pixel_centers &&
+ params.output_height == 2 * input_height && params.output_width == 2 * input_width)
+ {
+ ResizeBilinear2x2(batches, input_height, input_width, depth, params.output_height,
+ params.output_width, input_shape, input_data, output_shape, output_data);
+ }
+ else
+ {
+ float height_scale = static_cast<float>(input_height) / params.output_height;
+ float width_scale = static_cast<float>(input_width) / params.output_width;
+ if (params.align_corners && params.output_height > 1)
+ {
+ height_scale = static_cast<float>(input_height - 1) / (params.output_height - 1);
+ }
+ if (params.align_corners && params.output_width > 1)
+ {
+ width_scale = static_cast<float>(input_width - 1) / (params.output_width - 1);
+ }
+
+ ResizeBilinearGeneric(batches, input_height, input_width, depth, params.output_height,
+ params.output_width, height_scale, width_scale, input_shape, input_data,
+ output_data, params.half_pixel_centers);
+ }
+}
+
+void ResizeBilinear(ResizeBilinearParams ¶ms, const Shape &input_shape,
+ const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
+{
+ int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
+ int32_t input_height = input_shape.Dims(1);
+ int32_t input_width = input_shape.Dims(2);
+ int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
+
+ float height_scale = (params.align_corners && params.output_height > 1)
+ ? (static_cast<float>(input_height - 1) / (params.output_height - 1))
+ : (static_cast<float>(input_height) / params.output_height);
+
+ float width_scale = (params.align_corners && params.output_width > 1)
+ ? (static_cast<float>(input_width - 1) / (params.output_width - 1))
+ : (static_cast<float>(input_width) / params.output_width);
+
+ ResizeBilinearGenericSmallChannel<uint8_t>(
+ batches, input_height, input_width, depth, params.output_height, params.output_width,
+ height_scale, width_scale, input_shape, input_data, output_data, params.half_pixel_centers);
+}
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_RESIZEBILINEAR_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_SPACE_TO_DEPTH_H__
+#define __NNFW_CKER_SPACE_TO_DEPTH_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+template <typename T>
+inline void SpaceToDepth(const SpaceToDepthParams ¶ms, const Shape &unextended_input_shape,
+ const T *input_data, const Shape &unextended_output_shape, T *output_data)
+{
+ assert(unextended_input_shape.DimensionsCount() <= 4);
+ assert(unextended_output_shape.DimensionsCount() <= 4);
+ const Shape input_shape = Shape::ExtendedShape(4, unextended_input_shape);
+ const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape);
+
+ const int output_depth = output_shape.Dims(3);
+ const int output_width = output_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+
+ const int input_depth = input_shape.Dims(3);
+ const int batch_size = input_shape.Dims(0);
+
+ // Number of continuous values that we can copy in one interation.
+ const int stride = params.block_size * input_depth;
+
+ for (int batch = 0; batch < batch_size; ++batch)
+ {
+ for (int out_h = 0; out_h < output_height; ++out_h)
+ {
+ T *output_ptr = output_data + Offset(output_shape, batch, out_h, 0, 0);
+ for (int offset_h = 0; offset_h < params.block_size; ++offset_h)
+ {
+ T *dst = output_ptr;
+ for (int out_w = 0; out_w < output_width; ++out_w)
+ {
+ memcpy(dst, input_data, stride * sizeof(T));
+ input_data += stride;
+ dst += output_depth;
+ }
+ output_ptr += stride;
+ }
+ }
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_SPACE_TO_DEPTH_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_SPLIT_V_H__
+#define __NNFW_CKER_SPLIT_V_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+template <typename Scalar>
+void SplitV(const SplitVParams ¶ms, const Shape &input_shape, const Scalar *input_data,
+ std::vector<nnfw::cker::Shape> &output_shapes, Scalar *const *output_data)
+{
+ const int split_dimensions = input_shape.DimensionsCount();
+ int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis;
+ int outputs_count = params.num_split;
+
+ int64_t split_size = 0;
+
+ for (int i = 0; i < outputs_count; i++)
+ {
+ // TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions);
+ for (int j = 0; j < split_dimensions; j++)
+ {
+ if (j != axis)
+ {
+ MatchingDim(output_shapes[i], j, input_shape, j);
+ }
+ }
+ split_size += output_shapes[i].Dims(axis);
+ }
+
+ int64_t outer_size = 1;
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+ // For all output arrays,
+ // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+ int64_t base_inner_size = 1;
+ for (int i = axis + 1; i < split_dimensions; ++i)
+ {
+ base_inner_size *= input_shape.Dims(i);
+ }
+
+ const Scalar *input_ptr = input_data;
+ int copy_size = 0;
+ for (int k = 0; k < outer_size; k++)
+ {
+ for (int i = 0; i < outputs_count; ++i)
+ {
+ copy_size = output_shapes[i].Dims(axis) * base_inner_size;
+ memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar));
+ input_ptr += copy_size;
+ }
+ }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_SPLIT_V_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_STATELESS_RANDOM_UNIFORM_H__
+#define __NNFW_CKER_STATELESS_RANDOM_UNIFORM_H__
+
+#include "cker/Types.h"
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+#include "cker/eigen/EigenSupport.h"
+
+#include "cker/operation/Helper/Tensor.h"
+#include "cker/operation/Helper/PhiloxRandom.h"
+#include "cker/operation/Helper/RandomOpCpu.h"
+#include "cker/operation/Helper/RandomDistributions.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+void GenerateKey(Tensor seed, random::PhiloxRandom::Key *out_key,
+ random::PhiloxRandom::ResultType *out_counter)
+{
+ // Grab the two seeds
+ uint32_t seed0;
+ uint32_t seed1;
+
+ const auto seed_vals = seed.flat<int32_t>();
+
+ seed0 = seed_vals(0);
+ seed1 = seed_vals(1);
+ // Scramble the seeds so that the user doesn't need to worry about which
+ // part of the seed needs to be strong.
+ (*out_key)[0] = 0x3ec8f720;
+ (*out_key)[1] = 0x02461e29;
+ (*out_counter)[0] = static_cast<uint32_t>(seed0);
+ (*out_counter)[1] = (*out_counter)[3] = 0;
+ (*out_counter)[2] = static_cast<uint32_t>(seed1);
+ const auto mix = random::PhiloxRandom(*out_counter, *out_key)();
+ (*out_key)[0] = mix[0];
+ (*out_key)[1] = mix[1];
+ (*out_counter)[0] = (*out_counter)[1] = 0;
+ (*out_counter)[2] = mix[2];
+ (*out_counter)[3] = mix[3];
+}
+
+template <typename Device, class Distribution>
+void Fill(random::PhiloxRandom random, Tensor *output)
+{
+ // Build distribution
+ typedef typename Distribution::ResultElementType T;
+
+ auto flat = output->flat<T>();
+ // Reuse the compute kernels from the stateful random ops
+ functor::FillPhiloxRandom<Device, Distribution>()(random, flat.data(), flat.size(),
+ Distribution());
+}
+
+inline void StatelessRandomUniform(const Shape &shape_shape, const int *shape_data,
+ const Shape &seed_shape, const int *seed_data,
+ const Shape &output_shape, float *output_data)
+{
+ Tensor shape_t;
+ Tensor seed_t;
+
+ shape_t.shape.ReplaceWith(shape_shape.DimensionsCount(), shape_shape.DimsData());
+ shape_t.buffer = (void *)shape_data;
+
+ seed_t.shape.ReplaceWith(seed_shape.DimensionsCount(), seed_shape.DimsData());
+ seed_t.buffer = (void *)seed_data;
+
+ Tensor output_t;
+ output_t.shape.ReplaceWith(output_shape.DimensionsCount(), output_shape.DimsData());
+ output_t.buffer = output_data;
+
+ random::PhiloxRandom::Key key;
+ random::PhiloxRandom::ResultType counter;
+
+ GenerateKey(seed_t, &key, &counter);
+
+ Fill<Eigen::ThreadPoolDevice, random::UniformDistribution<random::PhiloxRandom, float>>(
+ random::PhiloxRandom(counter, key), &output_t);
+}
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_STATELESS_RANDOM_UNIFORM_H__
#include <ruy/context.h>
#include "cker/Types.h"
-namespace
-{
-const int kDefaultNumThreadpoolThreads = 4;
-}
-
namespace nnfw
{
namespace cker
namespace ruy_support
{
-struct RuyContext
-{
-public:
- RuyContext() : ruy_context_(new ruy::Context)
- {
- SetMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
-#ifdef USE_RUY_GEMV
- ruy_context_->cache_policy = ruy::kCacheLHSOnNarrowMul;
-#endif
- };
-
- ruy::Context *ruy_context() const { return ruy_context_.get(); }
-
- static inline RuyContext &GetRuyContext()
- {
- static thread_local RuyContext instance;
- return instance;
- }
-
- void SetMaxNumThreads(int max_num_threads)
- {
- const int target_num_threads =
- max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
- ruy_context_->max_num_threads = target_num_threads;
- }
-
-private:
- const std::unique_ptr<ruy::Context> ruy_context_;
-};
-
-inline ruy::Context *GetRuyContext()
-{
- auto &ctx = RuyContext::GetRuyContext();
- return ctx.ruy_context();
-}
-
template <typename Scalar, typename DataPointer>
void MakeRuyMatrix(const MatrixParams<Scalar> ¶ms, DataPointer data_ptr,
ruy::Matrix<Scalar> *dst)
author = 'Samsung Research & contributors'
# The full version, including alpha/beta/rc tags
-release = '1.7.0'
+release = '1.8.0'
# -- General configuration ---------------------------------------------------
```
$ sudo apt-get install cmake libboost-all-dev
-```
+```
If your linux system does not have the basic development configuration, you will need to install more packages. A list of all packages needed to configure the development environment can be found in the https://github.com/Samsung/ONE/blob/master/infra/docker/Dockerfile.1804 file.
scons \
software-properties-common \
unzip \
-wget
+wget
$ mkdir /tmp/gtest
$ cd /tmp/gtest
```
$ git clone https://github.com/Samsung/ONE.git one
$ cd one
-$ cp -n Makefile.template Makefile; make install
+$ make -f Makefile.template install
```
Unfortunately, the debug build on the x86_64 architecture currently has an error. To solve the problem, you must use gcc version 9 or higher. Another workaround is to do a release build rather than a debug build. This is not a suitable method for debugging during development, but it is enough to check the function of the runtime. To release build the runtime, add the environment variable `BUILD_TYPE=release` to the build command as follows.
│ │ ├── NeuralNetworksEx.h
│ │ ├── NeuralNetworksExtensions.h
│ │ ├── NeuralNetworks.h
-│ │ ├── nnfw_dev.h
+│ │ ├── nnfw_experimental.h
│ │ └── nnfw.h
│ └── onert
│ ├── backend
```
3) (Optional) Assign a specific backend to operations
``` c
- // Use acl_neon backend for CONV_2D and acl_cl for otherwise.
- // Note that defalut backend is acl_cl
+ // Use 'acl_neon' backend for CONV_2D and 'cpu' for otherwise.
+ // Note that defalut backend is 'cpu'.
nnfw_set_op_backend(session, "CONV_2D", "acl_neon");
```
Here is an example of using Makefile.
```bash
-cp -n Makefile.template Makefile
-
TARGET_OS=android \
CROSS_BUILD=1 \
NDK_DIR=/path/android-tools/r20/ndk \
EXT_ACL_FOLDER=/path/arm_compute-v19.11.1-bin-android/lib/android-arm64-v8a-neon-cl \
-make install
+make -f Makefile.template install
```
SHAPE | O | O | O
SIN | O | O | O
SKIP_GRAM | O | |
-SLICE | O | O |
+SLICE | O | O | O
SOFTMAX | O | O | O
SPACE_TO_BATCH_ND | O | O | O
SPACE_TO_DEPTH | O | O | O
+++ /dev/null
-## Feature Highlights
-
-- **ONE** Compiler
- - Compiler supports more operations
- - New command line interface for user interface consistancy
-- **ONE** Runtime
- - Runtime CPU backend supports more operations
- - Runtime CPU backend supports more quant8 operations
- - API changes
- - New optimization
-
-## ONE Compiler
-
-### Compiler supports more operations
-
-- MatrixDiag, MatrixSetDiag, ReverseSequence, ReverseV2, SegmentSum, SelectV2, SparseToDense, Where
-
-### New command line interface for user interface consistancy
-
-- one-import: imports conventional model files to circle
- - one-import-tf: imports TensorFlow model to circle
- - one-import-tflite: imports TensorFlow lite model to circle
-- one-optimize: circle optimize command
-- one-quantize: circle quantize command
- - supports float32 to uint8, layer wise (for Conv series)
-- one-pack: package command
-- one-prepare-venv: prepares python virtual environment for importing TensorFlow model
-- one-codegen: backend(if available) code generator
-
-## ONE Runtime
-
-### Runtime CPU backend supports more operations
-
-- LogSoftmax, SpaceToBatchND
-
-### Runtime CPU backend supports more quant8 operations
-
-- Logistic, Mul, Tanh, SpaceToBatchND, Transpose, Sub, Max, Min, Less, Greater, GreaterEqual, LessEqual, Equal, NotEqual
-
-### API changes
-
-- Introduce basic asynchronous execution API
-
-### New optimization
-
-- Remove dynamic tensor overhead from static models
--- /dev/null
+# Release Note 1.8.0
+
+## Feature Highlights
+
+- **ONE** Compiler
+ - Support new command line interface
+
+- **ONE** Runtime
+ - CPU backend supports 7 more operations
+ - CPU backend supports 9 more quant8 operations
+
+## ONE Compiler
+
+### New command line interface for user interface consistancy
+
+- `one-import-bcq` : import BCQ(Binary coding quantized) TensorFlow model
+- Commands now support `--version` option to show version number
+
+### Changes
+
+- Experimental support for TensorFlow 2.x has updated to 2.3.0 (TensorFlow 1.3.2 is our official support version)
+- Support more operators in luci-interpreter
+- Enhancing one-quantizer
+
+## ONE Runtime
+
+### Rename headers
+
+- Rename `nnfw_dev.h` to `nnfw_experimental.h`
+
+### Optimization
+
+- Remove copies for model input/outputs whenever possible
+
+### Support CPU backend operation
+
+- BatchToSpaceND, L2Normalization, ReLU6, ResizeBilinear, SpaceToDepth, SplitV, StatelessRandomUniform
+
+### Support CPU backend quant8 operation
+
+- BatchToSpaceND, L2Normalization, Pad, PadV2, ResizeBilinear, Slice, Quantize, SpaceToDepth, Sum
+
# API
+
+## Runtime Layered Architecture
+
+Here is a figure of runtime layered architecture.
+
+![Layered Architecture](api-layered-arch.png)
+
+There are three parts - Frontend, Core and Backend. Core works with Frontend and Backend API. Frontend gets user inputs(neural networks models) and Backend does the actual computation.
+
+## Frontend API
+
+Frontend API is about from creation/loading the model and
+
+Runtime supports two (frontend) APIs - NN API and NNFW API.
+
+### NN API
+
+NN API stands for Android Neural Networks API. It is part of Android Open Source Project and we provide a binding between NN API and One Runtime.
+
+For usage, refer to [Howto : NN API](../howto/how-to-use-nnapi-binding.md).
+
+### NNFW API
+
+NNFW API is ONE's own API. It supports loading models from NN Packages. As it is our own API, It can do most of functionalities that One Runtime offers. Representatively, it provides functions for execution with multiple backends.
+
+For usage, refer to [Howto : NNFW API](../howto/how-to-use-nnfw-api.md).
+
+## Backend API
+
+Backend API is defined by One Runtime.
+
+Backend API is about actual computation of operations and memory management for operands. In order to allow different kinds of computation units or computation libraries, One Runtime defines Backend API to support user defined operation kernels and memory manager. It contains a lot of C++ headers which are subject to change.
+
+For detailed descriptions, refer to [Backend API](../runtime/backend-api.md).
With generated tensors and kernels, the compiler creates executor objects. There are 3 types of executors are supported - Linear, Dataflow, and Parallel. Linear executor is the default executor and Dataflow Executor and Parallel Executor are experimental.
-For more about executors, please refer to [Executors](./executors.md) document.
+For more about executors, please refer to [Executors](executors.md) document.
### Module `exec`
Backends are plugins and they are loaded dynamically(via `dlopen`). So this module is a set of interface classes for backend implementation. `compiler` can compile with a variety of backends without knowing specific backend implementation.
-Backend interface classes are mostly about memory management and kernel generation. For more, please refer to [Backend API](./backend-api.md) document.
+Backend interface classes are mostly about memory management and kernel generation. For more, please refer to [Backend API](backend-api.md) document.
![Add-3Conv model](heterogeneous-execution-add-3-conv-model.png)
-Say we have 3 backends that are based on CPU, GPU and NPU(Neural Processing Unit) respectively. After executing Add, 3 Conv2D operations are ready to run. We may utilize those backends with [Parallel Executor (experimental)](./executors.md#parallel-executor-experimental). For this case we may get performance gain regardless of kernels' speed as those are run in parallel independently.
+Say we have 3 backends that are based on CPU, GPU and NPU(Neural Processing Unit) respectively. After executing Add, 3 Conv2D operations are ready to run. We may utilize those backends with [Parallel Executor (experimental)](executors.md#parallel-executor-experimental). For this case we may get performance gain regardless of kernels' speed as those are run in parallel independently.
## Graph Transformation
-Unfortunately it is not that simple to get performance gain. As each backend has its own memory management module, a copy must be done between backend boundaries. Plus, it may require layout changes so "Permute" operations are added from `PermutationInsertionPass`. This process is done from [Lowering](./core.md#1-lowering) phase of compilation.
+Unfortunately it is not that simple to get performance gain. As each backend has its own memory management module, a copy must be done between backend boundaries. Plus, it may require layout changes so "Permute" operations are added from `PermutationInsertionPass`. This process is done from [Lowering](core.md#1-lowering) phase of compilation.
Here is an example of that. Let's say we have assigned different backends for Add and Conv2D. So a Permute operation is inserted between them.
nnas_include(OptionTools)
envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
- set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v19.11.1.tar.gz)
+ set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v20.05.tar.gz)
ExternalSource_Download(ARMCOMPUTE ${ARMCOMPUTE_URL})
set(ARMComputeSource_DIR ${ARMCOMPUTE_SOURCE_DIR} PARENT_SCOPE)
# NOTE TensorFlow 1.12 downloads farmhash from the following URL
# TensorFlow 1.13.1 downloads farmhash from the following URL
- # TensorFlow 2.3-rc0 downloads farmhash from the following URL
+ # TensorFlow 2.3.0 downloads farmhash from the following URL
envoption(FARMHASH_1_12_URL https://github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz)
ExternalSource_Download(FARMHASH ${FARMHASH_1_12_URL})
BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS/build
INSTALL_DIR ${EXT_OVERLAY_DIR}
BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
- IDENTIFIER "1.10-fix1"
+ IDENTIFIER "1.10-fix2"
+ EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
PKG_NAME "FLATBUFFERS")
endfunction(_FlatBuffers_build)
find_path(HDF5_CONFIG_DIR "hdf5-config.cmake"
PATHS ${EXT_OVERLAY_DIR}
PATH_SUFFIXES
+ cmake
share/cmake
share/cmake/hdf5
cmake/hdf5
--- /dev/null
+function(_Pybind11_import)
+ nnas_find_package(Pybind11Source QUIET)
+
+ if(NOT Pybind11Source_FOUND)
+ set(Pybind11_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT Pybind11Source_FOUND)
+
+ nnas_include(ExternalBuildTools)
+ ExternalBuild_CMake(CMAKE_DIR ${Pybind11Source_DIR}
+ BUILD_DIR ${CMAKE_BINARY_DIR}/externals/PYBIND11/build
+ INSTALL_DIR ${EXT_OVERLAY_DIR}
+ IDENTIFIER "2.5.0"
+ PKG_NAME "PYBIND11"
+ EXTRA_OPTS "-DPYBIND11_TEST:BOOL=OFF")
+
+ find_path(Pybind11_INCLUDE_DIRS NAMES pybind11.h PATHS ${EXT_OVERLAY_DIR} PATH_SUFFIXES include/pybind11)
+
+ set(Pybind11_FOUND TRUE PARENT_SCOPE)
+endfunction(_Pybind11_import)
+
+_Pybind11_import()
--- /dev/null
+function(_Pybind11Source_import)
+ if(NOT DOWNLOAD_PYBIND11)
+ set(Pybind11Source_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_PYBIND11)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(PYBIND11_URL https://github.com/pybind/pybind11/archive/v2.5.0.tar.gz)
+
+ ExternalSource_Download(PYBIND11 ${PYBIND11_URL})
+
+ set(Pybind11Source_DIR ${PYBIND11_SOURCE_DIR} PARENT_SCOPE)
+ set(Pybind11Source_FOUND TRUE PARENT_SCOPE)
+endfunction(_Pybind11Source_import)
+
+_Pybind11Source_import()
--- /dev/null
+function(_TensorFlowEigenSource_import)
+ if(NOT DOWNLOAD_EIGEN)
+ set(TensorFlowEigenSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_EIGEN)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ # Exact version used by TensorFlow v2.3.0.
+ # See tensorflow/tensorflow/workspace.bzl.
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://gitlab.com")
+ envoption(TENSORFLOW_2_3_0_EIGEN_URL ${EXTERNAL_DOWNLOAD_SERVER}/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz)
+
+ ExternalSource_Download(EIGEN DIRNAME TENSORFLOW-2.3.0-EIGEN ${TENSORFLOW_2_3_0_EIGEN_URL})
+
+ set(TensorFlowEigenSource_DIR ${EIGEN_SOURCE_DIR} PARENT_SCOPE)
+ set(TensorFlowEigenSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowEigenSource_import)
+
+_TensorFlowEigenSource_import()
--- /dev/null
+set(PACKAGE_VERSION "2.3.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
--- /dev/null
+function(_TensorFlowSource_import)
+ if(NOT DOWNLOAD_TENSORFLOW)
+ set(TensorFlowSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_TENSORFLOW)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(TENSORFLOW_2_3_0_URL https://github.com/tensorflow/tensorflow/archive/v2.3.0.tar.gz)
+
+ ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.3.0 ${TENSORFLOW_2_3_0_URL})
+
+ set(TensorFlowSource_DIR ${TENSORFLOW_SOURCE_DIR} PARENT_SCOPE)
+ set(TensorFlowSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowSource_import)
+
+_TensorFlowSource_import()
--- /dev/null
+set(PACKAGE_VERSION "2.3.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
FROM ubuntu:16.04
ARG UBUNTU_MIRROR
-ENV http_proxy $http_proxy
-ENV https_proxy $https_proxy
RUN if [ -n "$http_proxy" ] ; then echo "Acquire::http::proxy \"${http_proxy}\";" >> /etc/apt/apt.conf ; fi
RUN if [ -n "$https_proxy" ] ; then echo "Acquire::https::proxy \"${https_proxy}\";" >> /etc/apt/apt.conf ; fi
# Additonal tools
RUN apt-get update && apt-get -qqy install doxygen graphviz wget unzip clang-format-3.9 python3 python3-pip python3-venv hdf5-tools pylint
+RUN pip3 install --upgrade pip
RUN pip3 install yapf==0.22.0 numpy
# Install google test (source)
FROM ubuntu:18.04
ARG UBUNTU_MIRROR
-ENV http_proxy $http_proxy
-ENV https_proxy $https_proxy
-
-RUN if [ -n "$http_proxy" ] ; then echo "Acquire::http::proxy \"${http_proxy}\";" >> /etc/apt/apt.conf ; fi
-RUN if [ -n "$https_proxy" ] ; then echo "Acquire::https::proxy \"${https_proxy}\";" >> /etc/apt/apt.conf ; fi
-RUN if [ -n "$UBUNTU_MIRROR" ] ; then sed "s/archive.ubuntu.com/${UBUNTU_MIRROR}/g" -i /etc/apt/sources.list ; fi
# Install 'add-apt-repository'
RUN apt-get update && apt-get -qqy install software-properties-common
# Additonal tools
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qqy install doxygen graphviz wget unzip clang-format-3.9 python3 python3-pip python3-venv hdf5-tools pylint
+RUN pip3 install --upgrade pip
RUN pip3 install yapf==0.22.0 numpy
# Install google test (source)
option(DOWNLOAD_PYTORCH "Download Pytorch source" ON)
option(DOWNLOAD_ONNX "Download ONNX source" ON)
option(DOWNLOAD_ABSEIL "Download Abseil-cpp source" ON)
+option(DOWNLOAD_PYBIND11 "Download Pybind11 source" ON)
option(DOWNLOAD_GTEST "Download Google Test source" ON)
option(BUILD_GTEST "Build Google Test from the downloaded source" ON)
oops pepper-assert \
hermes hermes-std \
loco locop locomotiv logo-core logo \
-foder souschef arser \
+foder souschef arser vconone \
safemain mio-circle mio-tflite \
tflite2circle \
luci \
OFF)
option(BUILD_RUNTIME_NNFW_API_TEST "Build Runtime NNFW API Tests" ON)
option(BUILD_TFLITE_RUN "Build tflite-run" ON)
-option(BUILD_TFLITE_RUN_2_2_0 "Build tflite-run 2.2.0" OFF)
+option(BUILD_TFLITE_VANILLA_RUN "Build tflite-vanilla-run" OFF)
option(BUILD_TFLITE_BENCHMARK_MODEL "Build tflite benchmark model" OFF)
option(BUILD_NNAPI_TEST "Build nnapi_test" ON)
option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON)
option(DOWNLOAD_RUY "Download ruy source" ON)
option(BUILD_BOOST "Build boost source" OFF)
option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" ON)
-option(BUILD_TENSORFLOW_LITE_2_2_0 "Build TensorFlow Lite from the downloaded source" OFF)
+option(BUILD_TENSORFLOW_LITE_2_3_0 "Build TensorFlow Lite 2.3.0 from the downloaded source" OFF)
option(BUILD_GTEST "Download and build Google Test" ON)
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" ON)
option(BUILD_RUY "Build ruy library from the downloaded source" ON)
function(_Eigen_import)
- nnas_find_package(TensorFlowEigenSource-2.3.0-rc0 QUIET)
+ nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET)
if(NOT TensorFlowEigenSource_FOUND)
set(Eigen_FOUND FALSE PARENT_SCOPE)
+++ /dev/null
-if(BUILD_TENSORFLOW_LITE_2_2_0)
- macro(return_unless VAR)
- if(NOT ${VAR})
- message("${VAR} NOT TRUE")
- set(TensorFlowLite_2_2_0_FOUND PARENT_SCOPE)
- return()
- endif(NOT ${VAR})
- endmacro(return_unless)
-
- nnas_include(ExternalSourceTools)
- nnas_include(OptionTools)
-
- # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.2.0/tensorflow/lite/tools/make/Makefile
-
- set(absl_url "https://github.com/abseil/abseil-cpp/archive/43ef2148c0936ebf7cb4be6b19927a9d9d145b8f.tar.gz")
- ExternalSource_Download("tflite220_Absl" ${absl_url})
- set(TFLite220AbslSource_DIR "${tflite220_Absl_SOURCE_DIR}")
- if (NOT TFLite220AbslSource_DIR STREQUAL "")
- set(TFLite220AbslSource_FOUND TRUE)
- endif()
- return_unless(TFLite220AbslSource_FOUND)
-
- set(eigen_url "https://gitlab.com/libeigen/eigen/-/archive/52a2fbbb008a47c5e3fb8ac1c65c2feecb0c511c/eigen-52a2fbbb008a47c5e3fb8ac1c65c2feecb0c511c.tar.gz")
- ExternalSource_Download("tflite220_Eigen" ${eigen_url})
- set(TFLite220EigenSource_DIR "${tflite220_Eigen_SOURCE_DIR}")
- if (NOT TFLite220EigenSource_DIR STREQUAL "")
- set(TFLite220EigenSource_FOUND TRUE)
- endif()
- return_unless(TFLite220EigenSource_FOUND)
-
- set(farmhash_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz")
- ExternalSource_Download("tflite220_Farmhash" ${farmhash_url})
- set(TFLite220FarmhashSource_DIR "${tflite220_Farmhash_SOURCE_DIR}")
- if (NOT TFLite220FarmhashSource_DIR STREQUAL "")
- set(TFLite220FarmhashSource_FOUND TRUE)
- endif()
- return_unless(TFLite220FarmhashSource_FOUND)
-
- set(fft2d_url "https://storage.googleapis.com/mirror.tensorflow.org/www.kurims.kyoto-u.ac.jp/~ooura/fft2d.tgz")
- ExternalSource_Download("tflite220_FFT2D" ${fft2d_url})
- set(TFLite220FFT2DSource_DIR "${tflite220_FFT2D_SOURCE_DIR}")
- if (NOT TFLite220FFT2DSource_DIR STREQUAL "")
- set(TFLite220FFT2DSource_FOUND TRUE)
- endif()
- return_unless(TFLite220FFT2DSource_FOUND)
-
- set(flatbuffers_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/flatbuffers/archive/v1.11.0.tar.gz")
- ExternalSource_Download("tflite220_FlatBuffers" ${flatbuffers_url})
- set(TFLite220FlatBuffersSource_DIR "${tflite220_FlatBuffers_SOURCE_DIR}")
- if (NOT TFLite220FlatBuffersSource_DIR STREQUAL "")
- set(TFLite220FlatBuffersSource_FOUND TRUE)
- endif()
- return_unless(TFLite220FlatBuffersSource_FOUND)
-
- set(fp16_url "https://github.com/Maratyszcza/FP16/archive/febbb1c163726b5db24bed55cc9dc42529068997.zip")
- ExternalSource_Download("tflite220_FP16" ${fp16_url})
- set(TFLite220FP16Source_DIR "${tflite220_FP16_SOURCE_DIR}")
- if (NOT TFLite220FP16Source_DIR STREQUAL "")
- set(TFLite220FP16Source_FOUND TRUE)
- endif()
- return_unless(TFLite220FP16Source_FOUND)
-
- set(gemmlowp_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/archive/12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3.zip")
- ExternalSource_Download("tflite220_GEMMLowp" ${gemmlowp_url})
- set(TFLite220GEMMLowpSource_DIR "${tflite220_GEMMLowp_SOURCE_DIR}")
- if (NOT TFLite220GEMMLowpSource_DIR STREQUAL "")
- set(TFLite220GEMMLowpSource_FOUND TRUE)
- endif()
- return_unless(TFLite220GEMMLowpSource_FOUND)
-
- set(neon2sse_url "https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip")
- ExternalSource_Download("tflite220_NEON2SSE" ${neon2sse_url})
- set(TFLite220NEON2SSESource_DIR "${tflite220_NEON2SSE_SOURCE_DIR}")
- if (NOT TFLite220NEON2SSESource_DIR STREQUAL "")
- set(TFLite220NEON2SSESource_FOUND TRUE)
- endif()
- return_unless(TFLite220NEON2SSESource_FOUND)
-
- set(tensorflow_url "https://github.com/tensorflow/tensorflow/archive/v2.2.0.tar.gz")
- ExternalSource_Download("tflite220_TensorFlow" ${tensorflow_url})
- set(TFLite220TensorFlowSource_DIR "${tflite220_TensorFlow_SOURCE_DIR}")
- if (NOT TFLite220TensorFlowSource_DIR STREQUAL "")
- set(TFLite220TensorFlowSource_FOUND TRUE)
- endif()
- return_unless(TFLite220TensorFlowSource_FOUND)
-
- nnas_include(ExternalProjectTools)
- add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite-2.2.0" tflite-2.2.0)
-
- set(TensorFlowLite_2_2_0_FOUND TRUE)
- return()
-endif()
-# Reference: https://github.com/tensorflow/tensorflow/blob/v2.2.0/tensorflow/lite/tools/make/Makefile
+# Reference: https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
#
-# Tensorflow Lite library 2.2.0
+# Tensorflow Lite library 2.3.0
#
-set(TENSORFLOW_LITE_BASE ${TFLite220TensorFlowSource_DIR}/tensorflow/lite)
+set(TENSORFLOW_LITE_BASE ${TFLiteVanillaTensorFlowSource_DIR}/tensorflow/lite)
file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c"
"${TENSORFLOW_LITE_BASE}/*.cc"
list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/memory_info.cc")
list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/time.cc")
-file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc"
- "${TENSORFLOW_LITE_BASE}/experimental/ruy/*.cc")
+file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc")
file(GLOB TFLITE_SPARSITY_SRCS "${TENSORFLOW_LITE_BASE}/tools/optimize/sparsity/*.cc")
list(APPEND TFLITE_SRCS ${TFLITE_SPARSITY_SRCS})
# externals
-list(APPEND TFLITE_SRCS "${TFLite220FarmhashSource_DIR}/src/farmhash.cc")
-list(APPEND TFLITE_SRCS "${TFLite220FFT2DSource_DIR}/fftsg.c")
-list(APPEND TFLITE_SRCS "${TFLite220FFT2DSource_DIR}/fftsg2d.c")
-list(APPEND TFLITE_SRCS "${TFLite220FlatBuffersSource_DIR}/src/util.cpp")
+list(APPEND TFLITE_SRCS "${TFLiteVanillaFarmhashSource_DIR}/src/farmhash.cc")
+list(APPEND TFLITE_SRCS "${TFLiteVanillaFFT2DSource_DIR}/fftsg.c")
+list(APPEND TFLITE_SRCS "${TFLiteVanillaFFT2DSource_DIR}/fftsg2d.c")
+list(APPEND TFLITE_SRCS "${TFLiteVanillaFlatBuffersSource_DIR}/src/util.cpp")
# externals - absl
-file(GLOB_RECURSE ABSL_SRCS "${TFLite220AbslSource_DIR}/absl/*.cc")
-file(GLOB_RECURSE ABSL_EXCLS "${TFLite220AbslSource_DIR}/absl/*test*.cc"
- "${TFLite220AbslSource_DIR}/absl/*benchmark*.cc"
- "${TFLite220AbslSource_DIR}/absl/synchronization/*.cc"
- "${TFLite220AbslSource_DIR}/absl/debugging/*.cc"
- "${TFLite220AbslSource_DIR}/absl/hash/*.cc"
- "${TFLite220AbslSource_DIR}/absl/flags/*.cc")
+file(GLOB_RECURSE ABSL_SRCS "${TFLiteVanillaAbslSource_DIR}/absl/*.cc")
+file(GLOB_RECURSE ABSL_EXCLS "${TFLiteVanillaAbslSource_DIR}/absl/*test*.cc"
+ "${TFLiteVanillaAbslSource_DIR}/absl/*benchmark*.cc"
+ "${TFLiteVanillaAbslSource_DIR}/absl/synchronization/*.cc"
+ "${TFLiteVanillaAbslSource_DIR}/absl/debugging/*.cc"
+ "${TFLiteVanillaAbslSource_DIR}/absl/hash/*.cc"
+ "${TFLiteVanillaAbslSource_DIR}/absl/flags/*.cc"
+ "${TFLiteVanillaAbslSource_DIR}/absl/random/*.cc")
list(REMOVE_ITEM ABSL_SRCS ${ABSL_EXCLS})
list(APPEND TFLITE_SRCS ${ABSL_SRCS})
+# externals - ruy
+file(GLOB RUY_SRCS "${TFLiteVanillaRuySource_DIR}/ruy/*.cc")
+file(GLOB_RECURSE RUY_EXCLS "${TFLiteVanillaRuySource_DIR}/ruy/*test*.cc"
+ "${TFLiteVanillaRuySource_DIR}/ruy/*benchmark*.cc"
+ "${TFLiteVanillaRuySource_DIR}/ruy/*example*.cc")
+list(REMOVE_ITEM RUY_SRCS ${RUY_EXCLS})
+# Temporary fix for ruy compilation error.
+# TODO(b/158800055): Remove this hack once the ruy version is correctly bumped.
+list(REMOVE_ITEM RUY_SRCS "${TFLiteVanillaRuySource_DIR}/ruy/prepare_packed_matrices.cc")
+list(APPEND TFLITE_SRCS ${RUY_SRCS})
+
+
# Build with mmap? true
-# caution: v2.2.0's Makefile has wrong code on this part. This is fixed on master branch.
+# caution: v2.3.0's Makefile has wrong code on this part. This is fixed on master branch.
set(BUILD_WITH_MMAP TRUE)
if(${BUILD_WITH_MMAP})
list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation_disabled.cc")
list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_EXCLS})
# include headers
-list(APPEND TFLITE_INCLUDES "${TFLite220TensorFlowSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLite220EigenSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLite220AbslSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLite220GEMMLowpSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLite220NEON2SSESource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLite220FarmhashSource_DIR}/src")
-list(APPEND TFLITE_INCLUDES "${TFLite220FlatBuffersSource_DIR}/include")
-list(APPEND TFLITE_INCLUDES "${TFLite220FP16Source_DIR}/include")
-
-add_library(tensorflow-lite-2.2.0 STATIC ${TFLITE_SRCS})
-target_include_directories(tensorflow-lite-2.2.0 SYSTEM PUBLIC ${TFLITE_INCLUDES})
-target_compile_definitions(tensorflow-lite-2.2.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV")
-set_property(TARGET tensorflow-lite-2.2.0 PROPERTY POSITION_INDEPENDENT_CODE ON)
-target_link_libraries(tensorflow-lite-2.2.0 eigen ${LIB_PTHREAD} dl)
-if(${BUILD_WITH_NNAPI})
- target_link_libraries(tensorflow-lite-2.2.0 rt)
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaTensorFlowSource_DIR}")
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaEigenSource_DIR}")
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaAbslSource_DIR}")
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaGEMMLowpSource_DIR}")
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaNEON2SSESource_DIR}")
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaFarmhashSource_DIR}/src")
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaFlatBuffersSource_DIR}/include")
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaFP16Source_DIR}/include")
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaRuySource_DIR}")
+
+add_library(tensorflow-lite-2.3.0 STATIC ${TFLITE_SRCS})
+target_include_directories(tensorflow-lite-2.3.0 SYSTEM PUBLIC ${TFLITE_INCLUDES})
+target_compile_definitions(tensorflow-lite-2.3.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV")
+set_property(TARGET tensorflow-lite-2.3.0 PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(tensorflow-lite-2.3.0 eigen ${LIB_PTHREAD} dl)
+if(NOT ANDROID AND ${BUILD_WITH_NNAPI})
+ target_link_libraries(tensorflow-lite-2.3.0 rt)
endif()
if(ANDROID)
- target_link_libraries(tensorflow-lite-2.2.0 log)
- target_include_directories(tensorflow-lite-2.2.0 PUBLIC "${NDK_DIR}/..")
+ target_link_libraries(tensorflow-lite-2.3.0 log)
+ target_include_directories(tensorflow-lite-2.3.0 PUBLIC "${NDK_DIR}/..")
endif()
--- /dev/null
+if(BUILD_TENSORFLOW_LITE_2_3_0)
+ macro(return_unless VAR)
+ if(NOT ${VAR})
+ message("${VAR} NOT TRUE")
+ set(TensorFlowLite_2_3_0_FOUND PARENT_SCOPE)
+ return()
+ endif(NOT ${VAR})
+ endmacro(return_unless)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
+
+ set(absl_url "https://github.com/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz")
+ ExternalSource_Download("TFLiteVanilla_Absl" ${absl_url})
+ set(TFLiteVanillaAbslSource_DIR "${TFLiteVanilla_Absl_SOURCE_DIR}")
+ if (NOT TFLiteVanillaAbslSource_DIR STREQUAL "")
+ set(TFLiteVanillaAbslSource_FOUND TRUE)
+ endif()
+ return_unless(TFLiteVanillaAbslSource_FOUND)
+
+ set(eigen_url "https://gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz")
+ ExternalSource_Download("TFLiteVanilla_Eigen" ${eigen_url})
+ set(TFLiteVanillaEigenSource_DIR "${TFLiteVanilla_Eigen_SOURCE_DIR}")
+ if (NOT TFLiteVanillaEigenSource_DIR STREQUAL "")
+ set(TFLiteVanillaEigenSource_FOUND TRUE)
+ endif()
+ return_unless(TFLiteVanillaEigenSource_FOUND)
+
+ set(farmhash_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz")
+ ExternalSource_Download("TFLiteVanilla_Farmhash" ${farmhash_url})
+ set(TFLiteVanillaFarmhashSource_DIR "${TFLiteVanilla_Farmhash_SOURCE_DIR}")
+ if (NOT TFLiteVanillaFarmhashSource_DIR STREQUAL "")
+ set(TFLiteVanillaFarmhashSource_FOUND TRUE)
+ endif()
+ return_unless(TFLiteVanillaFarmhashSource_FOUND)
+
+ set(fft2d_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/petewarden/OouraFFT/archive/v1.0.tar.gz")
+ ExternalSource_Download("TFLiteVanilla_FFT2D" ${fft2d_url})
+ set(TFLiteVanillaFFT2DSource_DIR "${TFLiteVanilla_FFT2D_SOURCE_DIR}")
+ if (NOT TFLiteVanillaFFT2DSource_DIR STREQUAL "")
+ set(TFLiteVanillaFFT2DSource_FOUND TRUE)
+ endif()
+ return_unless(TFLiteVanillaFFT2DSource_FOUND)
+
+ set(flatbuffers_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/flatbuffers/archive/v1.12.0.tar.gz")
+ ExternalSource_Download("TFLiteVanilla_FlatBuffers" ${flatbuffers_url})
+ set(TFLiteVanillaFlatBuffersSource_DIR "${TFLiteVanilla_FlatBuffers_SOURCE_DIR}")
+ if (NOT TFLiteVanillaFlatBuffersSource_DIR STREQUAL "")
+ set(TFLiteVanillaFlatBuffersSource_FOUND TRUE)
+ endif()
+ return_unless(TFLiteVanillaFlatBuffersSource_FOUND)
+
+ set(fp16_url "https://github.com/Maratyszcza/FP16/archive/4dfe081cf6bcd15db339cf2680b9281b8451eeb3.zip")
+ ExternalSource_Download("TFLiteVanilla_FP16" ${fp16_url})
+ set(TFLiteVanillaFP16Source_DIR "${TFLiteVanilla_FP16_SOURCE_DIR}")
+ if (NOT TFLiteVanillaFP16Source_DIR STREQUAL "")
+ set(TFLiteVanillaFP16Source_FOUND TRUE)
+ endif()
+ return_unless(TFLiteVanillaFP16Source_FOUND)
+
+ set(gemmlowp_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip")
+ ExternalSource_Download("TFLiteVanilla_GEMMLowp" ${gemmlowp_url})
+ set(TFLiteVanillaGEMMLowpSource_DIR "${TFLiteVanilla_GEMMLowp_SOURCE_DIR}")
+ if (NOT TFLiteVanillaGEMMLowpSource_DIR STREQUAL "")
+ set(TFLiteVanillaGEMMLowpSource_FOUND TRUE)
+ endif()
+ return_unless(TFLiteVanillaGEMMLowpSource_FOUND)
+
+ set(neon2sse_url "https://github.com/intel/ARM_NEON_2_x86_SSE/archive/1200fe90bb174a6224a525ee60148671a786a71f.tar.gz")
+ ExternalSource_Download("TFLiteVanilla_NEON2SSE" ${neon2sse_url})
+ set(TFLiteVanillaNEON2SSESource_DIR "${TFLiteVanilla_NEON2SSE_SOURCE_DIR}")
+ if (NOT TFLiteVanillaNEON2SSESource_DIR STREQUAL "")
+ set(TFLiteVanillaNEON2SSESource_FOUND TRUE)
+ endif()
+ return_unless(TFLiteVanillaNEON2SSESource_FOUND)
+
+ set(tensorflow_url "https://github.com/tensorflow/tensorflow/archive/v2.3.0.tar.gz")
+ ExternalSource_Download("TFLiteVanilla_TensorFlow" ${tensorflow_url})
+ set(TFLiteVanillaTensorFlowSource_DIR "${TFLiteVanilla_TensorFlow_SOURCE_DIR}")
+ if (NOT TFLiteVanillaTensorFlowSource_DIR STREQUAL "")
+ set(TFLiteVanillaTensorFlowSource_FOUND TRUE)
+ endif()
+ return_unless(TFLiteVanillaTensorFlowSource_FOUND)
+
+ set(ruy_url "https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip")
+ ExternalSource_Download("TFLiteVanilla_Ruy" ${ruy_url})
+ set(TFLiteVanillaRuySource_DIR "${TFLiteVanilla_Ruy_SOURCE_DIR}")
+ if (NOT TFLiteVanillaRuySource_DIR STREQUAL "")
+ set(TFLiteVanillaRuySource_FOUND TRUE)
+ endif()
+ return_unless(TFLiteVanillaRuySource_FOUND)
+
+ nnas_include(ExternalProjectTools)
+ add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite-2.3.0" tflite-2.3.0)
+
+ set(TensorFlowLite_2_3_0_FOUND TRUE)
+ return()
+endif()
[profile.tizen]
user=obs_viewer
obs = obs.tizen
-repos = repo.tizen_base,repo.tizen_mobile
+repos = repo.tizen_one,repo.tizen_base,repo.tizen_mobile
buildroot = /home/GBS-ROOT/
[obs.tizen]
url = http://download.tizen.org/snapshots/tizen/unified/latest/repos/standard/packages/
[repo.tizen_base]
-url = http://download.tizen.org/snapshots/tizen/base/latest/repos/standard/packages/
+url = http://download.tizen.org/snapshots/tizen/base/latest/repos/standard/packages/
+[repo.tizen_one]
+url = http://nnfw.mooo.com/archive/tizen/
# Invoke "preset_configure" function that the preset provides
preset_configure
-NPROC=$(cat /proc/cpuinfo | grep -c processor)
+NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+echo "[BUILD] \"make\" with -j${NPROC} option. You can specify the number of jobs by defining NPROC"
cmake --build . -- -j$((NPROC/2)) all
cmake --build . -- install
# Install NN Package tools
REQUIRED_UNITS+=("souschef")
REQUIRED_UNITS+=("safemain")
REQUIRED_UNITS+=("arser")
+ REQUIRED_UNITS+=("vconone")
# Hermes Logging Framework
REQUIRED_UNITS+=("hermes" "hermes-std")
# loco IR and related utilities
REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
REQUIRED_UNITS+=("record-minmax" "circle-quantizer")
REQUIRED_UNITS+=("one-cmds")
+ REQUIRED_UNITS+=("bcq-tools")
+
+ NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
# TODO Use "nncc configure" and "nncc build"
cmake \
-DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
-DCMAKE_BUILD_TYPE=release \
-DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+ -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
${EXTRA_OPTIONS[@]} \
"${NNAS_PROJECT_PATH}/infra/nncc"
}
# Install tf2nnpkg
install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
-
- # Create python virtual enviornment
- python3 -m venv "${NNAS_INSTALL_PREFIX}/bin/venv"
-
- # Install tensorflow
- source "${NNAS_INSTALL_PREFIX}/bin/venv/bin/activate"
- python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
- install -U pip setuptools
- python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
- install tensorflow-cpu==2.3.0rc0
}
--- /dev/null
+#!/bin/bash
+
+function preset_configure()
+{
+ REQUIRED_UNITS=()
+ # Common Libraries
+ REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp" "stdex")
+ REQUIRED_UNITS+=("oops" "pepper-assert" "foder")
+ REQUIRED_UNITS+=("souschef")
+ REQUIRED_UNITS+=("safemain")
+ REQUIRED_UNITS+=("arser")
+ REQUIRED_UNITS+=("vconone")
+ # Hermes Logging Framework
+ REQUIRED_UNITS+=("hermes" "hermes-std")
+ # loco IR and related utilities
+ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+ # Flatbuffer I/O
+ REQUIRED_UNITS+=("mio-tflite" "mio-circle")
+ # Circle compiler library (.circle -> .circle)
+ REQUIRED_UNITS+=("luci")
+ # Tools
+ REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+ REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+ REQUIRED_UNITS+=("record-minmax" "circle-quantizer")
+ REQUIRED_UNITS+=("one-cmds")
+
+ NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+ # TODO Use "nncc configure" and "nncc build"
+ cmake \
+ -G "MSYS Makefiles" \
+ -DTF2NNPKG_FOR_WINDOWS=ON \
+ -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+ -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+ -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+ -DENABLE_TEST=OFF \
+ -DDOWNLOAD_GTEST=OFF \
+ -DBUILD_GTEST=OFF \
+ -DCMAKE_C_COMPILER=gcc \
+ -DCMAKE_CXX_COMPILER=g++ \
+ -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+ -DCMAKE_BUILD_TYPE=release \
+ -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+ -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+ ${EXTRA_OPTIONS[@]} \
+ "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+ # Install libraries to bin/ for Windows release
+ mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+ rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+ install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+ "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+ # Install tf2nnpkg
+ install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.20200630" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+ # Though you have to install tensorflow to run 'tf2tfliteV2',
+ # tensorflow can't be installed in mingw. First, You can install tensorflow
+ # from Window native CMD(run as administrator) with python virtual environment.
+ # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
usage()
{
echo "Convert TensorFlow model to nnpackage."
- echo "Usage: tf2nnpkg --info <path/to/info> --graphdef <path/to/pb> [OPTION] -o <path/to/nnpkg/directory>"
- exit 0
+ echo "Usage: tf2nnpkg"
+ echo " --info <path/to/info>"
+ echo " --graphdef <path/to/pb>"
+ echo " -o <path/to/nnpkg/directory>"
+ echo " --v2 (optional) Use TF 2.x interface"
+ exit 255
}
+TF_INTERFACE="--v1"
+
# Parse command-line arguments
#
while [ "$#" -ne 0 ]; do
export OUTPUT_DIR="$2"
shift 2
;;
+ '--v2')
+ TF_INTERFACE="--v2"
+ shift
+ ;;
*)
echo "${CUR}"
shift
INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
# generate tflite file
-python "${ROOT}/bin/tf2tfliteV2.py" --v2 --input_path ${GRAPHDEF_FILE} \
---output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
---input_arrays ${INPUT} --output_arrays ${OUTPUT} || \
-python "${ROOT}/bin/tf2tfliteV2.py" --v1 --input_path ${GRAPHDEF_FILE} \
+python "${ROOT}/bin/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${GRAPHDEF_FILE} \
--output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
--input_arrays ${INPUT} --input_shapes ${INPUT_SHAPES} \
--output_arrays ${OUTPUT}
--- /dev/null
+#!/bin/bash
+#
+# STEP 1
+# Download latest TCM tool from
+# https://github.sec.samsung.net/RS-TCM/tca-standalone/releases/download/v0.0.8/tca-standalone-0.0.8.jar
+#
+# STEP 2
+# Create symbolic link `./src` for source directory to be analyzed which has `.ahub` configuration.
+#
+# STEP 3
+# run this `build-tcm.sh` script.
+#
+# See the following link for additional details.
+# https://github.sec.samsung.net/RS-TCM/tca-standalone/wiki/Tutorials-CPP-Gtest
+#
+
+echo ${PROJECT_DIR:=${PWD}}
+
+java -jar $PROJECT_DIR/tca-standalone-0.0.8.jar \
+ --outdir=$PROJECT_DIR/tcm-output \
+ --config=$PROJECT_DIR/.ahub/tcchecker-tca/config.yaml \
+ --local=$PROJECT_DIR/src \
+ --logfile=$PROJECT_DIR/tcm-output/tcm.log \
+ --debug
# TFLiteModelVerification $1 $2 $3
# Run ./tests/scripts/test-driver.sh script verification test
#
-# Unittests $1 $2 $3
-# Run ./tests/scripts/test-driver.sh script unittest
+# NNAPIGTest $1 $2 $3
+# Run [INSTALL_PATH]/test/onert-test unittest command for nnapi gtest
#
# NNPackageTest $1 $2
-# Run ./tests/scripts/nnpkg_test.sh script nnpackage test
+# Run [INSTALL_PATH]/test/onert-test nnpkg-test command
CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_PATH="$(cd ${CURRENT_PATH}/../../ && pwd)"
+# Install path on CI
+INSTALL_PATH=$ROOT_PATH/Product/out
+
function CheckTestPrepared()
{
# Model download server setting
export BACKENDS=$1
if [[ "$2" == "" ]]; then
- ./tests/scripts/test-driver.sh \
- --reportdir=$ROOT_PATH/$3 \
- --verification \
- .
+ $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
+ --reportdir=$ROOT_PATH/$3
else
- ./tests/scripts/test-driver.sh \
- --frameworktest_list_file=$2 \
- --reportdir=$ROOT_PATH/$3 \
- --verification \
- .
+ $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
+ --list=$2 \
+ --reportdir=$ROOT_PATH/$3
fi
unset BACKENDS
}
# $1: (required) backend
-# $2: (required) unittest skiplist file relative path from nnfw root directory
+# $2: (required) nnapi gtest skiplist file relative path from nnfw root directory
# pass empty string if there is no test list
# $3: (required) relative path for report from nnfw root directory
-function Unittests()
+function NNAPIGTest()
{
[[ $# -ne 3 ]] && echo "Invalid function argument setting" && exit 1
# Backup original nnapi_gtest.skip
# TODO Pass skiplist to test-driver.sh
- SKIPLIST_FILE="${ROOT_PATH}/Product/out/unittest/nnapi_gtest.skip"
+ SKIPLIST_FILE="${INSTALL_PATH}/unittest/nnapi_gtest.skip"
BACKUP_FILE="${SKIPLIST_FILE}.backup"
if [[ "$2" != "" ]]; then
cp ${SKIPLIST_FILE} ${BACKUP_FILE}
fi
export BACKENDS=$1
- ./tests/scripts/test-driver.sh \
+ $INSTALL_PATH/test/onert-test unittest \
--reportdir=$ROOT_PATH/$3 \
- --unittest \
- .
+ --unittestdir=$INSTALL_PATH/unittest
unset BACKENDS
# TODO Pass skiplist to test-driver.sh
do
for entry in "nnpkg-tcs"/$f; do
if [ -e $entry ]; then
- BACKENDS="$1" tests/scripts/nnpkg_test.sh -d -i nnpkg-tcs $(basename "$entry")
+ BACKENDS="$1" $INSTALL_PATH/test/onert-test nnpkg-test -d -i nnpkg-tcs $(basename "$entry")
fi
done
EXITCODE_F=$?
export BACKENDS=$1
if [[ "$2" == "" ]]; then
- ./tests/scripts/test-driver.sh \
- --frameworktest \
- --framework_driverbin="$ROOT_PATH/Product/out/bin/tflite_loader_test_tool" \
+ $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
--reportdir=$ROOT_PATH/$3
- .
else
- ./tests/scripts/test-driver.sh \
- --frameworktest \
- --framework_driverbin="$ROOT_PATH/Product/out/bin/tflite_loader_test_tool" \
- --frameworktest_list_file=tests/scripts/list/tflite_loader_list.${TEST_ARCH}.txt \
+ $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
+ --list=$2 \
--reportdir=$ROOT_PATH/$3
fi
unset BACKENDS
DEBUG_BUILD_ITEMS+=";oops;pepper-assert"
DEBUG_BUILD_ITEMS+=";hermes;hermes-std"
DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo"
-DEBUG_BUILD_ITEMS+=";foder;souschef;arser"
+DEBUG_BUILD_ITEMS+=";foder;souschef;arser;vconone"
DEBUG_BUILD_ITEMS+=";safemain;mio-circle;mio-tflite"
DEBUG_BUILD_ITEMS+=";tflite2circle"
DEBUG_BUILD_ITEMS+=";luci"
ROOT_PATH="$CURRENT_PATH/../../"
# prepare rootfs
-if [ ! -d $ROOTFS_DIR ]; then
+if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
echo "It will use default rootfs path"
else
DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
ROOT_PATH="$CURRENT_PATH/../../"
# prepare rootfs
-if [ ! -d $ROOTFS_DIR ]; then
+if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
echo "It will use default rootfs path"
else
DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
ROOT_PATH="$CURRENT_PATH/../../"
# prepare rootfs
-if [ ! -d $ROOTFS_DIR ]; then
+if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
echo "It will use default rootfs path"
else
DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
ROOT_PATH="$CURRENT_PATH/../../"
# prepare rootfs
-if [ ! -d $ROOTFS_DIR ]; then
+if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
echo "It will use default rootfs path"
else
DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
mkdir -p ${NNCC_INSTALL_PREFIX}
./nncc docker-run ./nnas create-package --prefix "${PWD}/${NNCC_INSTALL_PREFIX}" -- "${CONFIG_OPTIONS}"
+# create python virtual environment
+./nncc docker-run python3 -m venv "${NNCC_INSTALL_PREFIX}/bin/venv"
+
+./nncc docker-run "${NNCC_INSTALL_PREFIX}/bin/venv/bin/python" \
+ -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
+ install -U pip setuptools
+./nncc docker-run "${NNCC_INSTALL_PREFIX}/bin/venv/bin/python" \
+ -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
+ install tensorflow-cpu==2.3.0
+
mkdir -p ${ARCHIVE_PATH}
-tar -zcf ${ARCHIVE_PATH}/nncc-package.tar.gz -C ${NNCC_INSTALL_PREFIX} ./
+tar -zcf ${ARCHIVE_PATH}/nncc-package.tar.gz -C ${NNCC_INSTALL_PREFIX} --exclude "bin/venv" ./
+tar -zcf ${ARCHIVE_PATH}/nncc-venv-package.tar.gz -C ${NNCC_INSTALL_PREFIX} bin/venv
popd > /dev/null
ROOT_PATH="$CURRENT_PATH/../../"
# prepare rootfs
-if [ ! -d $ROOTFS_DIR ]; then
+if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
echo "It will use default rootfs path"
else
DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
REQUIRED_UNITS=()
# Common Libraries
REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp" "stdex")
-REQUIRED_UNITS+=("oops" "safemain" "foder" "arser" "oops")
+REQUIRED_UNITS+=("oops" "safemain" "foder" "arser" "vconone")
# Hermes Logging Framework
REQUIRED_UNITS+=("hermes" "hermes-std")
# loco IR and related utilities
for BACKEND in "${BACKENDS[@]}";
do
- NNPackageTest ${BACKEND} "tests/scripts/list/nnpkg_test_list.armv7l-linux.${BACKEND}"
+ NNPackageTest ${BACKEND} "Product/out/test/list/nnpkg_test_list.armv7l-linux.${BACKEND}"
done
# Interpreter test
export DISABLE_COMPILE=1
-NNPackageTest "interp" "tests/scripts/list/nnpkg_test_list.noarch.interp"
+NNPackageTest "interp" "Product/out/test/list/nnpkg_test_list.noarch.interp"
unset DISABLE_COMPILE
TENSOR_LOGGING=trace_log.txt ONERT_LOG_ENABLE=1 GRAPH_DOT_DUMP=1 ./infra/scripts/test_ubuntu_runtime_mixed.sh
# Enable trace event (acl_cl default backend)
export TRACE_FILEPATH=trace.json
-TFLiteModelVerification "acl_cl" "tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt" "report/acl_cl/trace"
+TFLiteModelVerification "acl_cl" "Product/out/test/list/frameworktest_list.armv7l.acl_cl.txt" "report/acl_cl/trace"
unset TRACE_FILEPATH
# Interpreter
fi
UNITTEST_SKIPLIST="Product/out/unittest/nnapi_gtest.skip.${TEST_PLATFORM}.${BACKEND}"
-FRAMEWORK_TESTLIST="tests/scripts/list/frameworktest_list.${TEST_ARCH}.${BACKEND}.txt"
+FRAMEWORK_TESTLIST="Product/out/test/list/frameworktest_list.${TEST_ARCH}.${BACKEND}.txt"
REPORT_BASE="report/${BACKEND}"
EXECUTORS=("Linear" "Dataflow" "Parallel")
export EXECUTOR="${EXECUTOR}"
fi
- Unittests "${BACKEND}" "${UNITTEST_SKIPLIST}" "${REPORT_PATH}"
+ NNAPIGTest "${BACKEND}" "${UNITTEST_SKIPLIST}" "${REPORT_PATH}"
TFLiteModelVerification "${BACKEND}" "${FRAMEWORK_TESTLIST}" "${REPORT_PATH}"
if [ $EXECUTOR = "Interpreter" ]; then
# Current support acl_cl backend testlist only
# TODO Support more backends
-TFLITE_LOADER_TESTLIST="tests/scripts/list/tflite_loader_list.${TEST_ARCH}.txt"
+TFLITE_LOADER_TESTLIST="Product/out/test/list/tflite_loader_list.${TEST_ARCH}.txt"
if [[ $TFLITE_LOADER = "1" ]]; then
TFLiteLoaderTest "${BACKEND}" "${TFLITE_LOADER_TESTLIST}" "${REPORT_BASE}/loader/${EXECUTOR}"
-
- # Test custom op
- pushd ${ROOT_PATH} > /dev/null
- ./Product/out/tests/FillFrom_runner
- popd > /dev/null
fi
# This test requires test model installation
pushd ${ROOT_PATH} > /dev/null
-echo
-echo "==== Run nnfw_api_gtest begin ===="
-echo
-NNFW_API_TEST_MODEL_INSTALLER=tests/scripts/nnfw_api_gtest/install_nnfw_api_gtest_nnpackages.sh
-TEST_BIN=Product/out/unittest_standalone/nnfw_api_gtest
-$NNFW_API_TEST_MODEL_INSTALLER --install-dir ${TEST_BIN}_models
-${TEST_BIN}
-echo
-echo "==== Run nnfw_api_gtest end ===="
-echo
+echo ""
+echo "==== Run standalone unittest begin ===="
+echo ""
+Product/out/test/onert-test prepare-model --model=nnpackage
+Product/out/test/onert-test unittest --unittestdir=Product/out/unittest_standalone
+echo ""
+echo "==== Run standalone unittest end ===="
+echo ""
+
+# Test custom op
+pushd ${ROOT_PATH} > /dev/null
+./Product/out/test/FillFrom_runner
popd > /dev/null
-Product/out/unittest_standalone/test_compute
-Product/out/unittest_standalone/test_onert
-Product/out/unittest_standalone/test_onert_backend_cpu_common
-Product/out/unittest_standalone/test_onert_frontend_nnapi
-Product/out/unittest_standalone/tflite_test
-
-pushd ${ROOT_PATH}
-
# NOTE Fixed backend assignment by type of operation
# TODO Enhance this with randomized test
BACKENDS=(acl_cl acl_neon cpu)
# Get the intersect of framework test list files
-TESTLIST_PREFIX="tests/scripts/list/frameworktest_list.${TEST_ARCH}"
+TESTLIST_PREFIX="Product/out/test/list/frameworktest_list.${TEST_ARCH}"
SKIPLIST_PREFIX="Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}"
sort $TESTLIST_PREFIX.${BACKENDS[0]}.txt > $TESTLIST_PREFIX.intersect.txt
sort $SKIPLIST_PREFIX.${BACKENDS[0]} > $SKIPLIST_PREFIX.union
export OP_BACKEND_MaxPool2D="acl_cl"
export OP_BACKEND_AvgPool2D="acl_neon"
export ACL_LAYOUT="NCHW"
-Unittests "acl_cl;acl_neon;cpu" "Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed"
+NNAPIGTest "acl_cl;acl_neon;cpu" "Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed"
TFLiteModelVerification "acl_cl;acl_neon;cpu" "${TESTLIST_PREFIX}.intersect.txt" "report/mixed"
{
# download tflite model files
pushd $HOST_HOME
- tests/scripts/framework/run_test.sh --download=on
+ tests/scripts/models/run_test.sh --download=on --run=off
# TODO Since this command removes model file(.zip),
# We must always download the file unlike model file(.tflite).
# Because caching applies only to tflite file.
find tests -name "*.zip" -exec rm {} \;
- tar -zcf cache.tar.gz tests/scripts/framework/cache
+ tar -zcf cache.tar.gz -C tests/scripts/models cache
$SDB_CMD push cache.tar.gz $TEST_ROOT/.
rm -rf cache.tar.gz
- $SDB_CMD shell tar -zxf $TEST_ROOT/cache.tar.gz -C $TEST_ROOT
+ $SDB_CMD shell tar -zxf $TEST_ROOT/cache.tar.gz -C $TEST_ROOT/Product/out/test/models
# download api test model file for nnfw_api_gtest
MODEL_CACHE_DIR=$(mktemp -d)
- tests/scripts/nnfw_api_gtest/install_nnfw_api_gtest_nnpackages.sh --install-dir $MODEL_CACHE_DIR
+ tests/scripts/models/run_test.sh --download=on --run=off \
+ --configdir=test/scripts/nnfw_api_gtest/models \
+ --cachedir=$MODEL_CACHE_DIR
tar -zcf $MODEL_CACHE_DIR/api_model_test.tar.gz -C $MODEL_CACHE_DIR .
$SDB_CMD push $MODEL_CACHE_DIR/api_model_test.tar.gz $TEST_ROOT/Product/out/unittest_standalone/nnfw_api_gtest_models/
$SDB_CMD shell tar -zxf $TEST_ROOT/Product/out/unittest_standalone/nnfw_api_gtest_models/api_model_test.tar.gz \
rm -rf ${GCOV_DIR}/*
pushd ${GCOV_DIR}
- sdb pull ${TEST_ROOT}/tests/scripts/build_path.txt
+ sdb pull ${TEST_ROOT}/Product/out/test/build_path.txt
SRC_PREFIX=`cat build_path.txt`
GCOV_PREFIX_STRIP=`echo "${SRC_PREFIX}" | grep -o '/' | wc -l`
GCOV_DATA_PATH="/opt/usr/nnfw-gcov"
```
The structures and relevant APIs are defined in nnfw APIs.
-Please see `nnfw_dev.h` for detail.
+Please see `nnfw_experimental.h` for detail.
You can find example in `nnfw` repository.
Name: nnfw
Summary: nnfw
-Version: 1.7.0
+Version: 1.8.0
Release: 1
Group: Development
License: Apache-2.0 and MIT and BSD-2-Clause
%ifarch %{arm} aarch64
# Require python for acl-ex library build pre-process
BuildRequires: python
-BuildRequires: libarmcl-devel
+BuildRequires: libarmcl-devel >= v20.05
%endif
Requires(post): /sbin/ldconfig
%description plugin-devel
NNFW development package for backend plugin developer
+%package minimal-app
+Summary: Minimal test binary for VD manual test
+
+%description minimal-app
+Minimal test binary for VD manual test
+
%if %{test_build} == 1
%package test
Summary: NNFW Test
%define install_dir %{_prefix}
%define install_path %{buildroot}%{install_dir}
%define build_env NNFW_WORKSPACE=build
-%define build_options -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DENABLE_TEST=off
+%define build_options -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DENABLE_TEST=off -DBUILD_MINIMAL_SAMPLE=on
# Set option for test build (and coverage test build)
%define test_install_home /opt/usr/nnfw-test
%if %{coverage_build} == 1
pwd > tests/scripts/build_path.txt
%endif # coverage_build
-tar -zcf test-suite.tar.gz infra/scripts tests/scripts
+tar -zcf test-suite.tar.gz infra/scripts
%endif # test_build
%endif # arm armv7l aarch64
%ifarch arm armv7l aarch64
mkdir -p %{buildroot}%{_libdir}
+mkdir -p %{buildroot}%{_bindir}
mkdir -p %{buildroot}%{_includedir}
install -m 644 build/out/lib/*.so %{buildroot}%{_libdir}
+install -m 755 build/out/bin/onert-minimal-app %{buildroot}%{_bindir}
cp -r build/out/include/* %{buildroot}%{_includedir}/
# For developer
%if %{test_build} == 1
%{test_build_env} ./nnfw install
# Share test script with ubuntu (ignore error if there is no list for target)
-cp tests/nnapi/nnapi_gtest.skip.* %{buildroot}%{test_install_dir}/unittest/.
+cp tests/nnapi/nnapi_gtest.skip.%{target_arch}-* %{buildroot}%{test_install_dir}/unittest/.
cp %{buildroot}%{test_install_dir}/unittest/nnapi_gtest.skip.%{target_arch}-linux.cpu %{buildroot}%{test_install_dir}/unittest/nnapi_gtest.skip
tar -zxf test-suite.tar.gz -C %{buildroot}%{test_install_home}
%if %{coverage_build} == 1
mkdir -p %{buildroot}%{test_install_home}/gcov
find . -name "*.gcno" -exec xargs cp {} %{buildroot}%{test_install_home}/gcov/. \;
+install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/test/build_path.txt
%endif # coverage_build
%endif # test_build
%manifest %{name}.manifest
%defattr(-,root,root,-)
%ifarch arm armv7l aarch64
-%dir %{_includedir}/nnfw
+%dir %{_includedir}/onert
%{_includedir}/onert/*
%{_libdir}/pkgconfig/nnfw-plugin.pc
%endif
+%files minimal-app
+%manifest %{name}.manifest
+%defattr(-,root,root,-)
+%{_bindir}/onert-minimal-app
+
%if %{test_build} == 1
%files test
%manifest %{name}.manifest
--- /dev/null
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+ type: "AveragePool2D"
+ averagepool2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ filter_width: 2
+ filter_height: 2
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 5 dim: 5 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 2 dim: 25 }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 25 }
+ filler {
+ tag: "constant"
+ arg: "1.1"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 2 dim: 25 }
+}
+operation {
+ type: "DepthwiseConv2D"
+ version: 2
+ depthwiseconv2d_options {
+ padding: VALID
+ stride_w: 2
+ stride_h: 2
+ dilation_w_factor: 2
+ dilation_h_factor: 1
+ depth_multiplier: 5
+ activation : RELU6
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+input: "ker"
+output: "ofm"
--- /dev/null
+# To check if DEPTHWISE_CONV_2D version is 2
+
+RULE "OP_VERSION_CHECK" $(op_version DEPTHWISE_CONV_2D) '=' 2
--- /dev/null
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 1 dim: 112 dim: 112 dim: 4 }
+ quant { min: 0 max: 6 scale: 0.0235294 zero_point: 0 }
+}
+operand {
+ name: "ker"
+ type: UINT8
+ shape { dim: 1 dim: 3 dim: 3 dim: 4 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+ quant {
+ min: -30.3175 min: -0.779597 min: -10.2751 min: -10.8594
+ max: 4.35049 max: 2.70807 max: 11.0269 max: 20.97
+ scale:0.135953 scale: 0.0136771 scale: 0.0835375 scale: 0.124821
+ zero_point:223 zero_point: 57 zero_point: 123 zero_point: 87
+ quantized_dimension: 3
+ }
+}
+operand {
+ name: "bias"
+ type: INT32
+ shape { dim: 4 }
+ filler {
+ tag: "gaussian"
+ arg: "0"
+ arg: "1.0"
+ }
+ quant {
+ scale: 1.4758e-16 scale: 3.15185e-05 scale: 2.20685e-05 scale: 1.72205e-16
+ zero_point: 0 zero_point: 0 zero_point: 0 zero_point: 0
+ }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 112 dim: 112 dim: 4 }
+ quant { min: 0 max: 6 scale: 0.0235294 zero_point: 0 }
+
+}
+operation {
+ type: "DepthwiseConv2D"
+ depthwiseconv2d_options {
+ padding: SAME
+ stride_w: 1
+ stride_h: 1
+ depth_multiplier: 1
+ activation : RELU6
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+input: "ker"
+output: "ofm"
--- /dev/null
+operand {
+ name: "in"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 }
+}
+operand {
+ name: "weight"
+ type: FLOAT32
+ shape { dim: 4 dim: 16 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "2" arg: "-3" arg: "-4"
+ arg: "-5" arg: "6" arg: "-7" arg: "8"
+ arg: "4" arg: "-2" arg: "3" arg: "-1"
+ arg: "-8" arg: "-6" arg: "7" arg: "5"
+ arg: "1" arg: "2" arg: "-3" arg: "-4"
+ arg: "-5" arg: "6" arg: "-7" arg: "8"
+ arg: "4" arg: "-2" arg: "3" arg: "-1"
+ arg: "-8" arg: "-6" arg: "7" arg: "5"
+ arg: "1" arg: "2" arg: "-3" arg: "-4"
+ arg: "-5" arg: "6" arg: "-7" arg: "8"
+ arg: "4" arg: "-2" arg: "3" arg: "-1"
+ arg: "-8" arg: "-6" arg: "7" arg: "5"
+ arg: "1" arg: "2" arg: "-3" arg: "-4"
+ arg: "-5" arg: "6" arg: "-7" arg: "8"
+ arg: "4" arg: "-2" arg: "3" arg: "-1"
+ arg: "-8" arg: "-6" arg: "7" arg: "5"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "-2" arg: "-3" arg: "4"
+ }
+}
+operand {
+ name: "out"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 }
+}
+operation {
+ type: "FullyConnected"
+ fullyconnected_options {
+ activation: NONE
+ }
+ input: "in"
+ input: "weight"
+ input: "bias"
+ output: "out"
+}
+input: "in"
+output: "out"
--- /dev/null
+operand {
+ name: "ifm1"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ quant { min: 0 max: 2 scale: 0.0078125 zero_point: 128}
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ quant { min: 0 max: 2 scale: 0.0078125 zero_point: 128}
+}
+operation {
+ type: "L2Normalize"
+ l2norm_options {
+ activation: NONE
+ }
+ input: "ifm1"
+ output: "ofm"
+}
+input: "ifm1"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ quant { min: 0 max: 1 scale: 0.00390625 zero_point: -128 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ quant { min: 0 max: 1 scale: 0.00390625 zero_point: -128 }
+}
+operation {
+ type: "Logistic"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "Const_transposed"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 3
+ dim: 3
+ dim: 1
+ }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "0.1"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+}
+operand {
+ name: "FusedBatchNormV3"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 4
+ dim: 4
+ dim: 1
+ }
+ quant {
+ quantized_dimension: 0
+ }
+}
+operand {
+ name: "FusedBatchNormV3_add_param"
+ type: FLOAT32
+ shape {
+ dim: 1
+ }
+ filler {
+ tag: "explicit"
+ arg: "-2.04724"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+}
+operand {
+ name: "FusedBatchNormV3_mul_0"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 4
+ dim: 4
+ dim: 1
+ }
+ quant {
+ quantized_dimension: 0
+ }
+}
+operand {
+ name: "FusedBatchNormV3_mul_0_param"
+ type: FLOAT32
+ shape {
+ dim: 1
+ }
+ filler {
+ tag: "explicit"
+ arg: "2.00834"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+}
+operand {
+ name: "Hole"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 2
+ dim: 2
+ dim: 1
+ }
+ quant {
+ min: 0
+ max: 255
+ quantized_dimension: 0
+ }
+}
+operand {
+ name: "conv2d_transpose"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 4
+ dim: 4
+ dim: 1
+ }
+ quant {
+ quantized_dimension: 0
+ }
+}
+operand {
+ name: "conv2d_transpose/input_sizes"
+ type: INT32
+ shape {
+ dim: 4
+ }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ arg: "4"
+ arg: "4"
+ arg: "1"
+ }
+ quant {
+ quantized_dimension: 0
+ }
+}
+operation {
+ type: "TransposeConv"
+ input: "conv2d_transpose/input_sizes"
+ input: "Const_transposed"
+ input: "Hole"
+ output: "conv2d_transpose"
+ transpose_conv_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+}
+operation {
+ type: "Mul"
+ input: "conv2d_transpose"
+ input: "FusedBatchNormV3_mul_0_param"
+ output: "FusedBatchNormV3_mul_0"
+ mul_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Add"
+ input: "FusedBatchNormV3_mul_0"
+ input: "FusedBatchNormV3_add_param"
+ output: "FusedBatchNormV3"
+ add_options {
+ activation: NONE
+ }
+}
+input: "Hole"
+output: "FusedBatchNormV3"
--- /dev/null
+operand {
+ name: "ifm1"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "size"
+ type: INT32
+ shape { dim: 2 }
+ filler {
+ tag: "constant" arg: "16" arg: "16"
+ }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+ type: "ResizeBilinear"
+ input: "ifm1"
+ input: "size"
+ output: "ofm"
+ resize_bilinear_options {
+ align_corners: false
+ half_pixel_centers: false
+ }
+}
+input: "ifm1"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { dim: 1 dim: 2 dim: 2 dim: 12 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+ type: "SpaceToDepth"
+ space_to_depth_options {
+ block_size: 2
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
operand {
name: "ker"
type: FLOAT32
- shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+ shape { dim: 3 dim: 1 dim: 1 dim: 3 }
filler {
tag: "gaussian"
arg: "0.0"
--- /dev/null
+operand {
+ name: "out_shape"
+ type: INT32
+ shape { dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "4" arg: "4" arg: "1"
+ }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "2" arg: "-3" arg: "-4" arg: "5" arg: "-6"
+ arg: "7" arg: "8" arg: "-9" arg: "-10" arg: "11" arg: "-12"
+ arg: "13" arg: "14" arg: "-15" arg: "-16" arg: "17" arg: "-18"
+ }
+}
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 1 }
+}
+
+operation {
+ type: "TransposeConv"
+ transpose_conv_options {
+ padding: SAME
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "out_shape"
+ input: "ker"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 4 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "ofm_idx"
+ type: INT32
+ shape { dim: 4 }
+}
+operation {
+ type: "Unique"
+ unique_options {
+ idx_out_type: INT32
+ }
+ input: "ifm"
+ output: "ofm"
+ output: "ofm_idx"
+}
+input: "ifm"
+output: "ofm"
+output: "ofm_idx"
--- /dev/null
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 4 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { }
+}
+operand {
+ name: "ofm_idx"
+ type: INT64
+ shape { dim: 4 }
+}
+operation {
+ type: "Unique"
+ unique_options {
+ idx_out_type: INT64
+ }
+ input: "ifm"
+ output: "ofm"
+ output: "ofm_idx"
+}
+input: "ifm"
+output: "ofm"
+output: "ofm_idx"
--- /dev/null
+operand {
+ name: "ifm"
+ type: INT32
+ shape { dim: 5 }
+}
+operand {
+ name: "ofm"
+ type: INT32
+ shape { }
+}
+operand {
+ name: "ofm_idx"
+ type: INT32
+ shape { dim: 5 }
+}
+operation {
+ type: "Unique"
+ unique_options {
+ idx_out_type: INT32
+ }
+ input: "ifm"
+ output: "ofm"
+ output: "ofm_idx"
+}
+input: "ifm"
+output: "ofm"
+output: "ofm_idx"
--- /dev/null
+operand {
+ name: "ifm"
+ type: INT32
+ shape { dim: 5 }
+}
+operand {
+ name: "ofm"
+ type: INT32
+ shape { }
+}
+operand {
+ name: "ofm_idx"
+ type: INT64
+ shape { dim: 5 }
+}
+operation {
+ type: "Unique"
+ unique_options {
+ idx_out_type: INT64
+ }
+ input: "ifm"
+ output: "ofm"
+ output: "ofm_idx"
+}
+input: "ifm"
+output: "ofm"
+output: "ofm_idx"
--- /dev/null
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 4 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { }
+}
+operand {
+ name: "ofm_idx"
+ type: INT32
+ shape { dim: 4 }
+}
+operation {
+ type: "Unique"
+ unique_options {
+ idx_out_type: INT32
+ }
+ input: "ifm"
+ output: "ofm"
+ output: "ofm_idx"
+}
+input: "ifm"
+output: "ofm"
+output: "ofm_idx"
--- /dev/null
+operand {
+ name: "ifm"
+ type: UINT8
+ shape { dim: 5 }
+ quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+ name: "ofm"
+ type: UINT8
+ shape { }
+}
+operand {
+ name: "ofm_idx"
+ type: INT64
+ shape { dim: 5 }
+}
+operation {
+ type: "Unique"
+ unique_options {
+ idx_out_type: INT64
+ }
+ input: "ifm"
+ output: "ofm"
+ output: "ofm_idx"
+}
+input: "ifm"
+output: "ofm"
+output: "ofm_idx"
--- /dev/null
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+ BOOL = 6,
+ INT16 = 7,
+ COMPLEX64 = 8,
+ INT8 = 9,
+ FLOAT64 = 10,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+ custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+ CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+ // These four parameters are the asymmetric linear quantization parameters.
+ // Given a quantized value q, the corresponding float value f should be:
+ // f = scale * (q - zero_point)
+ // For other quantization types, the QuantizationDetails below is used.
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float]; // For dequantizing the tensor's values.
+ zero_point:[long];
+
+ // If this is not none, the other quantization parameters (i.e. min, max,
+ // scale, zero_point fields above) are ignored and the value of the
+ // QuantizationDetails union should be used.
+ details:QuantizationDetails;
+
+ // Specifies the dimension of the Tensor's shape that the scales and
+ // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+ // with quantization params:
+ // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+ // will be quantized across the second dimension of t.
+ // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+ // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+ // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+ quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+// 1. In what order to traverse these dimensions. For example, to store a 2-D
+// matrix in row major order, the traversal order would be (d0, d1),
+// whereas to store it in column major order, the traversal order would be
+// (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+// could be (d0, d1, d2, d3).
+// 2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+// tensor dimension in (d0, ..., dn-1).
+// 3. In the traversal order defined above, the format (dense vs. sparse) and
+// index metadata for each dimension. For a dense dimension, this is just
+// the size of that dimension. For a sparse dimension, it's the same as
+// the compressed index defined in the Compressed Sparse Row (CSR) format.
+// (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+// 1. DENSE: each coordinate in this dimension is stored implicitly.
+// 2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+// compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+ DENSE = 0,
+ SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+ values:[int];
+}
+
+table Uint16Vector {
+ values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+ values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+ Int32Vector,
+ Uint16Vector,
+ Uint8Vector
+}
+
+table DimensionMetadata {
+ // Whether a dimension is dense or sparse.
+ format:DimensionType;
+ // Index metadata used for a dimension.
+ // - If format is DimensionType.DENSE then we use the dense_size field to
+ // store the size of that dimension. Each index in that dimension is
+ // stored implicitly.
+ // - If format is DimensionType.SPARSE_CSR then we use array_segments and
+ // array_indices to encode that dimension. array_segments represents how
+ // to segment the indices array, each segment corresponds to one element
+ // in the previous dimension. array_indices represents the index of the
+ // non-zero elements within this dimension (as those in the CSR matrix
+ // format, where the first array is row pointers and the second array is
+ // column indices).
+ dense_size:int;
+ array_segments:SparseIndexVector;
+ array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+ // The traversal order of the dimensions defined in the `shape` field of the
+ // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+ // ..., dn-1),
+ // - if not block sparse, the traversal_order is just a permutation of (d0,
+ // ..., dn-1). For example, a 2-D matrix stored in row-major order would
+ // have traversal_order = (d0, d1).
+ // - if block sparse with a k-dimensional block (0 <= k <= n), the
+ // traversal_order has n + k elements. The first n elements are still a
+ // permutation of (d0, ..., dn-1). The lask k elements are a permutation
+ // of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+ // example, a 2-D matrix with 2-D blocks, both stored in row-major order
+ // would have traversal_order = (d0, d1, d2, d3).
+ traversal_order:[int];
+ // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+ // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+ // tensor dimension in (d0, ..., dn).
+ // It's stored in the order of (dn, ..., dn+k-1).
+ // If not block-sparse, this field is NULL.
+ block_map:[int];
+ // In the traversal order defined above, the metadata needed for
+ // each dimension to locate the non-zero values in the original dense tensor.
+ // The size of the dim_metadata array = the size of the traversal_order array
+ // = n + k.
+ dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, height, width, number of channels] (That's
+ // Tensorflow's NHWC).
+ shape:[int];
+ type:TensorType;
+ // An index that refers to the buffers table at the root of the model. Or,
+ // if there is no data buffer associated (i.e. intermediate results), then
+ // this is 0 (which refers to an always existent empty buffer).
+ //
+ // The data_buffer itself is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+ buffer:uint;
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+
+ is_variable:bool = false;
+
+ // Parameters to encode a sparse tensor. See the example in
+ // tensorflow/lite/testdata/sparse_tensor.json.
+ sparsity:SparsityParameters; // Optional.
+
+ // Encodes `shape` with unknown dimensions. Unknown dimensions are
+ // represented with -1.
+ shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+
+enum BuiltinOperator : byte {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ DEPTH_TO_SPACE = 5,
+ DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ MUL = 18,
+ RELU = 19,
+ // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+ // since different model developers use RELU1 in different ways. Never
+ // create another op called RELU1.
+ RELU_N1_TO_1 = 20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+ EMBEDDING_LOOKUP_SPARSE = 33,
+ PAD = 34,
+ UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ GATHER = 36,
+ BATCH_TO_SPACE_ND = 37,
+ SPACE_TO_BATCH_ND = 38,
+ TRANSPOSE = 39,
+ MEAN = 40,
+ SUB = 41,
+ DIV = 42,
+ SQUEEZE = 43,
+ UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ STRIDED_SLICE = 45,
+ BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ EXP = 47,
+ TOPK_V2 = 48,
+ SPLIT = 49,
+ LOG_SOFTMAX = 50,
+ // DELEGATE is a special op type for the operations which are delegated to
+ // other backends.
+ // WARNING: Experimental interface, subject to change
+ DELEGATE = 51,
+ BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ CAST = 53,
+ PRELU = 54,
+ MAXIMUM = 55,
+ ARG_MAX = 56,
+ MINIMUM = 57,
+ LESS = 58,
+ NEG = 59,
+ PADV2 = 60,
+ GREATER = 61,
+ GREATER_EQUAL = 62,
+ LESS_EQUAL = 63,
+ SELECT = 64,
+ SLICE = 65,
+ SIN = 66,
+ TRANSPOSE_CONV = 67,
+ SPARSE_TO_DENSE = 68,
+ TILE = 69,
+ EXPAND_DIMS = 70,
+ EQUAL = 71,
+ NOT_EQUAL = 72,
+ LOG = 73,
+ SUM = 74,
+ SQRT = 75,
+ RSQRT = 76,
+ SHAPE = 77,
+ POW = 78,
+ ARG_MIN = 79,
+ FAKE_QUANT = 80,
+ REDUCE_PROD = 81,
+ REDUCE_MAX = 82,
+ PACK = 83,
+ LOGICAL_OR = 84,
+ ONE_HOT = 85,
+ LOGICAL_AND = 86,
+ LOGICAL_NOT = 87,
+ UNPACK = 88,
+ REDUCE_MIN = 89,
+ FLOOR_DIV = 90,
+ REDUCE_ANY = 91,
+ SQUARE = 92,
+ ZEROS_LIKE = 93,
+ FILL = 94,
+ FLOOR_MOD = 95,
+ RANGE = 96,
+ RESIZE_NEAREST_NEIGHBOR = 97,
+ LEAKY_RELU = 98,
+ SQUARED_DIFFERENCE = 99,
+ MIRROR_PAD = 100,
+ ABS = 101,
+ SPLIT_V = 102,
+ UNIQUE = 103,
+ CEIL = 104,
+ REVERSE_V2 = 105,
+ ADD_N = 106,
+ GATHER_ND = 107,
+ COS = 108,
+ WHERE = 109,
+ RANK = 110,
+ ELU = 111,
+ REVERSE_SEQUENCE = 112,
+ MATRIX_DIAG = 113,
+ QUANTIZE = 114,
+ MATRIX_SET_DIAG = 115,
+ ROUND = 116,
+ HARD_SWISH = 117,
+ IF = 118,
+ WHILE = 119,
+ NON_MAX_SUPPRESSION_V4 = 120,
+ NON_MAX_SUPPRESSION_V5 = 121,
+ SCATTER_ND = 122,
+ SELECT_V2 = 123,
+ DENSIFY = 124,
+ SEGMENT_SUM = 125,
+ BATCH_MATMUL = 126
+}
+
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+ EmbeddingLookupSparseOptions,
+ MulOptions,
+ PadOptions,
+ GatherOptions,
+ BatchToSpaceNDOptions,
+ SpaceToBatchNDOptions,
+ TransposeOptions,
+ ReducerOptions,
+ SubOptions,
+ DivOptions,
+ SqueezeOptions,
+ SequenceRNNOptions,
+ StridedSliceOptions,
+ ExpOptions,
+ TopKV2Options,
+ SplitOptions,
+ LogSoftmaxOptions,
+ CastOptions,
+ DequantizeOptions,
+ MaximumMinimumOptions,
+ ArgMaxOptions,
+ LessOptions,
+ NegOptions,
+ PadV2Options,
+ GreaterOptions,
+ GreaterEqualOptions,
+ LessEqualOptions,
+ SelectOptions,
+ SliceOptions,
+ TransposeConvOptions,
+ SparseToDenseOptions,
+ TileOptions,
+ ExpandDimsOptions,
+ EqualOptions,
+ NotEqualOptions,
+ ShapeOptions,
+ PowOptions,
+ ArgMinOptions,
+ FakeQuantOptions,
+ PackOptions,
+ LogicalOrOptions,
+ OneHotOptions,
+ LogicalAndOptions,
+ LogicalNotOptions,
+ UnpackOptions,
+ FloorDivOptions,
+ SquareOptions,
+ ZerosLikeOptions,
+ FillOptions,
+ BidirectionalSequenceLSTMOptions,
+ BidirectionalSequenceRNNOptions,
+ UnidirectionalSequenceLSTMOptions,
+ FloorModOptions,
+ RangeOptions,
+ ResizeNearestNeighborOptions,
+ LeakyReluOptions,
+ SquaredDifferenceOptions,
+ MirrorPadOptions,
+ AbsOptions,
+ SplitVOptions,
+ UniqueOptions,
+ ReverseV2Options,
+ AddNOptions,
+ GatherNdOptions,
+ CosOptions,
+ WhereOptions,
+ RankOptions,
+ ReverseSequenceOptions,
+ MatrixDiagOptions,
+ QuantizeOptions,
+ MatrixSetDiagOptions,
+ HardSwishOptions,
+ IfOptions,
+ WhileOptions,
+ DepthToSpaceOptions,
+ NonMaxSuppressionV4Options,
+ NonMaxSuppressionV5Options,
+ ScatterNdOptions,
+ SelectV2Options,
+ DensifyOptions,
+ SegmentSumOptions,
+ BatchMatMulOptions
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU_N1_TO_1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ // Parameters for DepthwiseConv version 1 or above.
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ // `depth_multiplier` is redundant. It's used by CPU kernels in
+ // TensorFlow 2.0 or below, but ignored in versions above.
+ // See comments in lite/c/builtin_op_data.h for more details.
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+ // Parameters for DepthwiseConv version 2 or above.
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+ // For weights-only quantization, use asymmetric quantization for non
+ // constant inputs at evaluation time.
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ merge_outputs: bool;
+ asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+ DEFAULT = 0,
+ SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ // Parameters for FullyConnected version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+
+ // Parameters for FullyConnected version 2 or above.
+ weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+ // Parameters for FullyConnected version 5 or above.
+ // If set to true, then the number of dimension is preserved. Furthermore,
+ // all but the last dimension of the input and output shapes will be equal.
+ keep_num_dims: bool;
+
+ // Parameters for FullyConnected version 7 or above.
+ // If set to true, then weights-only op will use asymmetric quantization for
+ // inputs.
+ asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table MulOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+enum LSTMKernelType : byte {
+ // Full LSTM kernel which supports peephole and projection.
+ FULL = 0,
+ // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+ BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ // Parameters for LSTM version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // Parameters for LSTM version 2 or above.
+ // Basic kernel is only supported in version 2 or above.
+ kernel_type: LSTMKernelType = FULL;
+
+ // Parameters for LSTM version 4 or above.
+ asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true then first dimension is sequence, otherwise batch.
+ time_major:bool;
+
+ // Parameter for Unidirectional Sequence LSTM version 4.
+ asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+ // Parameters supported by version 1:
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true, store the outputs of both directions into the first output.
+ merge_outputs: bool;
+
+ // Parameters supported by version 2:
+ // If true then first dimension is sequence, otherwise batch.
+ // Version 1 implementations assumed time_major to be true, so this default
+ // value should never change.
+ time_major: bool = true;
+
+ // Parameters for version 3 or above.
+ asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+ new_height: int (deprecated);
+ new_width: int (deprecated);
+ align_corners: bool;
+ half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+ align_corners: bool;
+ half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+table DepthToSpaceOptions {
+ block_size: int;
+}
+
+table SubOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DivOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+ SUM = 0,
+ MEAN = 1,
+ SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+ combiner:CombinerType;
+}
+
+table GatherOptions {
+ axis: int;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+ keep_dims: bool;
+}
+
+table SqueezeOptions {
+ squeeze_dims:[int];
+}
+
+table SplitOptions {
+ num_splits: int;
+}
+
+table SplitVOptions {
+ num_splits: int;
+}
+
+table StridedSliceOptions {
+ begin_mask: int;
+ end_mask: int;
+ ellipsis_mask: int;
+ new_axis_mask: int;
+ shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+ in_data_type: TensorType;
+ out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+ output_type : TensorType;
+}
+
+table ArgMinOptions {
+ output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+ validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+ // Optional output type of the operation (int32 or int64). Defaults to int32.
+ out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+ // Parameters supported by version 1:
+ min:float;
+ max:float;
+ num_bits:int;
+
+ // Parameters supported by version 2:
+ narrow_range:bool;
+}
+
+table PackOptions {
+ values_count:int;
+ axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+ axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+ num:int;
+ axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+ alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+ // Doesn't include borders.
+ REFLECT = 0,
+ // Includes borders.
+ SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+ mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+ idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+ seq_dim:int;
+ batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+ then_subgraph_index:int;
+ else_subgraph_index:int;
+}
+
+table WhileOptions {
+ cond_subgraph_index:int;
+ body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+ adj_x:bool;
+ adj_y:bool;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ builtin_code:BuiltinOperator;
+ custom_code:string;
+
+ // The version of the operator. The version need to be bumped whenever new
+ // parameters are introduced into an op.
+ version:int = 1;
+}
+
+enum CustomOptionsFormat : byte {
+ FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:uint;
+
+ // Optional input are indicated by -1.
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+ custom_options_format:CustomOptionsFormat;
+
+ // A list of booleans indicating the input tensors which are being mutated by
+ // this operator.(e.g. used by RNN and LSTM).
+ // For example, if the "inputs" array refers to 5 tensors and the second and
+ // fifth are mutable variables, then this list will contain
+ // [false, true, false, false, true].
+ //
+ // If the list is empty, no variable is mutated in this operator.
+ // The list either has the same length as `inputs`, or is empty.
+ mutating_variable_inputs:[bool];
+
+ // A list of indices to the subgraph's "tensors" that are internal to an Op.
+ // Internal tensors are those that do not flow in or out of the operation,
+ // but instead are part of internal computation. As such, the operation's
+ // implementation may manage its memory more efficiently. They are needed
+ // however (i.e. not just an implementation detail) since they are part of the
+ // computation, which may require relevant metadata such as quantization
+ // parameters.
+ intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+ // A list of all tensors used in this subgraph.
+ tensors:[Tensor];
+
+ // Indices of the tensors that are inputs into this subgraph. Note this is
+ // the list of non-static tensors that feed into the subgraph for inference.
+ inputs:[int];
+
+ // Indices of the tensors that are outputs out of this subgraph. Note this is
+ // the list of output tensors that are considered the product of the
+ // subgraph's inference.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of this subgraph (used for debugging).
+ name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+ data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+ // A human readable string to uniquely identify a Metadata.
+ name:string;
+ // An index to the buffers table.
+ buffer:uint;
+}
+
+table Model {
+ // Version of the schema.
+ version:uint;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+
+ // Buffers of the model.
+ // Note the 0th entry of this array must be an empty buffer (sentinel).
+ // This is a convention so that tensors without a buffer can provide 0 as
+ // their buffer.
+ buffers:[Buffer];
+
+ // Metadata about the model. Indirects into the existings buffers list.
+ // Deprecated, prefer to use metadata field.
+ metadata_buffer:[int];
+
+ // Metadata about the model.
+ metadata:[Metadata];
+}
+
+root_type Model;
2.1.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.1.0/tensorflow/lite/schema/schema.fbs
2.2.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.2.0/tensorflow/lite/schema/schema.fbs
2.3.0-rc0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.3.0-rc0/tensorflow/lite/schema/schema.fbs
+2.3.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.3.0/tensorflow/lite/schema/schema.fbs
--- /dev/null
+import tensorflow as tf
+
+i = tf.constant(0, shape=[1, 0], dtype=tf.int32, name='i')
+x = tf.compat.v1.placeholder(shape=[1, 1], dtype=tf.int32, name='Hole')
+
+c = lambda i: tf.compat.v1.less(tf.compat.v1.size(i[0]), 10)
+b = lambda i: tf.concat([i, x], axis=1)
+
+# this loop changs i's shape from [1, 0] -> [1, 1] -> [1, 2] -> ... -> [1, 10]
+r = tf.compat.v1.while_loop(
+ c, b, [i], name="While", shape_invariants=[tf.TensorShape([1, None])])
+
+output = tf.compat.v1.identity(r, name="Output")
+
+# by adding the following code, [[1 1 1 1 1 1 1 1 1 1]] and (1, 10) will be printed
+#
+# import numpy as np
+# x_val = np.array([[1]])
+# with tf.Session() as sess:
+# result = sess.run(r, feed_dict={x:x_val})
+# print(result)
+# print(result.shape)
+
+# with TF 2.3, tf2tflite throws the following error
+#
+# Exception: venv/tf-2.3/lib/python3.6/site-packages/tensorflow/python/eager/lift_to_graph.py:339:0:
+# error: body function result type tensor<1x1xi32> is incompatible with result type tensor<1x0xi32>
+# at index 0
+# ...
+# note: see current operation: %1:2 = "tf.While"(%0, %arg0)
+# {body = @_functionalize_body_00, cond = @_functionalize_cond_00, device = "", is_stateless = false, output_shapes = [], parallel_iterations = 10 : i64}
+# : (tensor<1x0xi32>, tensor<1x1xi32>) -> (tensor<1x0xi32>, tensor<1x1xi32>)
--- /dev/null
+import tensorflow as tf
+
+x = tf.compat.v1.placeholder(shape=[1, None], dtype=tf.int32, name='Hole')
+i = tf.compat.v1.placeholder(shape=[1, None], dtype=tf.int32, name='Hole_2')
+
+
+def c(ii):
+ rs = tf.compat.v1.shape(ii)
+ r1 = rs[1]
+ return tf.compat.v1.less(r1, 10)
+
+
+def b(ii):
+ return tf.concat([ii, x], axis=1)
+
+
+# this loop changes i's shape from [1, 0] -> [1, 1] -> [1, 2] -> ... -> [1, 10]
+r = tf.compat.v1.while_loop(
+ c, b, [i], name="While", shape_invariants=[tf.TensorShape([1, None])])
+
+output = tf.compat.v1.identity(r, name="Output")
+
+# by adding the following code, [[123 1 2 3 1 2 3 1 2 3]] and (1, 10) will be printed
+#
+'''
+import numpy as np
+i_val = np.array([[123]], dtype=np.int32)
+x_val = np.array([[1, 2, 3]], dtype=np.int32)
+with tf.compat.v1.Session() as sess:
+ result = sess.run(r, feed_dict={x:x_val, i:i_val})
+ print(result)
+ print(result.shape)
+'''
--- /dev/null
+import tensorflow as tf
+import numpy as np
+
+input_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 2, 2, 1), name="Hole")
+W = np.ones(9).reshape((3, 3, 1, 1))
+filter_ = tf.compat.v1.constant(W, dtype=tf.float32)
+tconv_ = tf.compat.v1.nn.conv2d_transpose(
+ input_, filter_, output_shape=(1, 4, 4, 1), strides=[1, 1, 1, 1], padding='VALID')
+
+scale_ = tf.compat.v1.constant([1.0177339315414429], dtype=tf.float32)
+offset_ = tf.compat.v1.constant([0.015628524124622345], dtype=tf.float32)
+mean_ = tf.compat.v1.constant([1.027155211195349693], dtype=tf.float32)
+variance_ = tf.compat.v1.constant([0.25580066442489624], dtype=tf.float32)
+bn_out, _, _ = tf.compat.v1.nn.fused_batch_norm(
+ tconv_,
+ scale_,
+ offset_,
+ mean=mean_,
+ variance=variance_,
+ epsilon=0.0010000000474974513,
+ is_training=False)
+'''
+python ../../compiler/tf2tfliteV2/tf2tfliteV2.py --v1 \
+-i tconv-bn.pbtxt \
+-o tconv-bn.tflite \
+-I Hole -O FusedBatchNorm
+'''
minSdkVersion 26
targetSdkVersion 29
versionCode 1
- versionName "1.7.0"
+ versionName "1.8.0"
externalNativeBuild {
ndkBuild {
EXT_ACL_FOLDER=/home/hanjoung/ws/temp/arm_compute-v19.05-bin-android/lib/android-arm64-v8a-neon-cl \
ANDROID_BUILD_TOOLS_DIR=/home/hanjoung/ws/android-tools/sdk/build-tools/27.0.3/ \
ANDROID_SDK_DIR=/home/hanjoung/ws/android-tools/sdk \
- TFLITE_MODEL_PATH=/Users/hanjoung/ws/ghent/STAR/nnfw/tests/scripts/framework/cache/MODELS/mobilenet/mobilenet_v1_0.25_128.tflite \
+ TFLITE_MODEL_PATH=/Users/hanjoung/ws/ghent/STAR/nnfw/tests/scripts/models/cache/MODELS/mobilenet/mobilenet_v1_0.25_128.tflite \
ANDROID_BOOST_ROOT=/home/hanjoung/ws/gh/moritz-wundke/Boost-for-Android/build/out/arm64-v8a
```
file(GLOB_RECURSE SOURCES "src/*.cpp")
-add_library(nnfw_lib_benchmark SHARED ${SOURCES})
+add_library(nnfw_lib_benchmark STATIC ${SOURCES})
target_include_directories(nnfw_lib_benchmark PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
target_link_libraries(nnfw_lib_benchmark PRIVATE ${LIB_PTHREAD})
-install(TARGETS nnfw_lib_benchmark DESTINATION lib)
if (option.memory)
{
print_memory = true;
- for (int i = PhaseEnum::MODEL_LOAD; i <= PhaseEnum::EXECUTE; ++i)
+ for (int i = PhaseEnum::MODEL_LOAD; i < PhaseEnum::EXECUTE; ++i)
{
auto phase = phases.at(gPhaseStrings[i]);
for (int j = MemoryType::RSS; j <= MemoryType::PSS; ++j)
template <typename DstType, typename SrcType> inline DstType polymorphic_downcast(SrcType *x)
{
+#ifndef __ANDROID__
assert(dynamic_cast<DstType>(x) == x);
+#endif
return static_cast<DstType>(x);
}
* Outputs:
* * 0: The sum, a tensor of the same type as input0.
*/
- ANEURALNETWORKS_ADDV2_EX = 50039
+ ANEURALNETWORKS_ADDV2_EX = 50039,
+
+ ANEURALNETWORKS_STATELESS_RANDOM_UNIFORM_EX = 50040,
+
+ /** Splits a tensor value into a list of sub tensors.
+ *
+ * Supported tensor {@link OperandCode}:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32, ANEURALNETWORKS_TENSOR_INT32}
+ *
+ * Supported tensor rank: up to 4
+ *
+ * Inputs:
+ * * 0: A tensor to split.
+ * * 1: A tensor containing the sizes of each output tensor along split_dim
+ * * 2: The dimension along which to split
+ *
+ * Outputs:
+ * * 0: Tensor objects resulting from splitting value.
+ */
+ ANEURALNETWORKS_SPLIT_V_EX = 50041
} OperationCodeEx; // extends OperationCode
add_library(${ONERT_DEV} SHARED ${API_SRC})
# Public headers to publish
-# nnfw_debug.h is header for runtime developer, so it will not be installed
-# But runtime developer can use nnfw_debug.h by linking nnfw-dev
-set(NNFW_API_HEADERS include/nnfw.h include/nnfw_dev.h)
+# nnfw_internal.h is header for runtime developer, so it will not be installed
+# But runtime developer can use nnfw_internal.h by linking nnfw-dev
+set(NNFW_API_HEADERS include/nnfw.h include/nnfw_experimental.h)
target_link_libraries(${ONERT_DEV} PUBLIC nnfw-nnapi-header)
target_link_libraries(${ONERT_DEV} PUBLIC onert_core)
NNFW_STATUS_ERROR = 1,
/** Unexpected null argument is given. */
NNFW_STATUS_UNEXPECTED_NULL = 2,
+ /** When a function was called but it is not valid for the current session state. */
+ NNFW_STATUS_INVALID_STATE = 3,
+ /** When it is out of memory */
+ NNFW_STATUS_OUT_OF_MEMORY = 4,
} NNFW_STATUS;
/**
*
* <p>Supported backends differs on each platforms.
* For example, `x86_64` supports "cpu" only.
- * Can set multiple backends by semicolon (ex: "acl_cl;cpu").
- * Among the multiple backends, the 1st element is used as default backend.</p>
- *
- * @note Possible backend strings are: "cpu", "acl_cl", "acl_neon", "srcn"
+ * Multiple backends can be set and they must be separated by a semicolon (ex: "acl_cl;cpu").
+ * For each backend string, `libbackend_{backend}.so` will be dynamically loaded during
+ * {@link nnfw_prepare}.
+ * Among the multiple backends, the 1st element is used as the default backend.</p>
*
* @param[in] session session to which avilable backends are set
* @param[in] backends available backends on which nnfw uses
*
* This function should be called before {@link nnfw_prepare} is invoked.
*
- * <p>Supported backends differs on each platforms.
- * For example, `x86_64` supports "cpu" only.
- * The backend for op has higher priority than available backends specified by
- * nnfw_set_available_backends.</p>
+ * <p>The backend for op has higher priority than available backends specified by
+ * {@link nnfw_set_available_backends}.</p>
*
- * @note Possible backend strings are: "cpu", "acl_cl", "acl_neon"
+ * @deprecated Deprecated since 1.8.0.
*
* @param[in] session session to be modified
* @param[in] op operation to be set
* limitations under the License.
*/
-#ifndef __NNFW_DEV_H__
-#define __NNFW_DEV_H__
+#ifndef __NNFW_EXPERIMENTAL_H__
+#define __NNFW_EXPERIMENTAL_H__
#include "nnfw.h"
NNFW_STATUS nnfw_register_custom_op_info(nnfw_session *session, const char *id,
custom_kernel_registration_info *info);
-#endif // __NNFW_DEV_H__
+#endif // __NNFW_EXPERIMENTAL_H__
* limitations under the License.
*/
-#ifndef __NNFW_DEBUG_H__
-#define __NNFW_DEBUG_H__
+#ifndef __NNFW_INTERNAL_H__
+#define __NNFW_INTERNAL_H__
#include "nnfw.h"
NNFW_STATUS nnfw_get_config(nnfw_session *session, const char *key, char *value, size_t value_size);
-#endif // __NNFW_DEBUG_H__
+/**
+ * @brief Load a circle model from buffer.
+ *
+ * The buffer must outlive the session.
+ *
+ * @param[in] session session
+ * @param[in] buffer Pointer to the buffer
+ * @param[in] size Buffer size
+ * @return NNFW_STATUS
+ */
+NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer, size_t size);
+
+#endif // __NNFW_INTERNAL_H__
* NNFW_VERSION is a uint32 value representing nnfw runtime version
* in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
*/
-#define NNFW_VERSION 0x01000700
+#define NNFW_VERSION 0x01000800
#endif // __NNFW_VERSION_H__
#ifndef __ONERT_BACKEND_CUSTOM_KERNEL_H__
#define __ONERT_BACKEND_CUSTOM_KERNEL_H__
-#include "nnfw_dev.h"
+#include "nnfw_experimental.h"
#include "backend/CustomKernelBuilder.h"
#include "exec/IFunction.h"
STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_NO_ERROR, 0);
STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_ERROR, 1);
STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_UNEXPECTED_NULL, 2);
+STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_INVALID_STATE, 3);
+STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_OUT_OF_MEMORY, 4);
STATIC_ASSERT_ENUM_CHECK(NNFW_LAYOUT_NONE, 0);
STATIC_ASSERT_ENUM_CHECK(NNFW_LAYOUT_CHANNELS_LAST, 1);
{
NNFW_RETURN_ERROR_IF_NULL(session);
- *session = new nnfw_session();
-
+ *session = new (std::nothrow) nnfw_session();
+ if (*session == nullptr)
+ return NNFW_STATUS_OUT_OF_MEMORY;
return NNFW_STATUS_NO_ERROR;
}
// It should not be reached.
return NNFW_STATUS_ERROR;
}
+
+NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer, size_t size)
+{
+ NNFW_RETURN_ERROR_IF_NULL(session);
+ return session->load_circle_from_buffer(buffer, size);
+}
nnfw_session::~nnfw_session() = default;
-NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir)
+NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size)
{
if (!isStateInitialized())
+ return NNFW_STATUS_INVALID_STATE;
+
+ if (!buffer)
+ return NNFW_STATUS_UNEXPECTED_NULL;
+
+ if (size == 0)
return NNFW_STATUS_ERROR;
+ _subgraphs = onert::circle_loader::loadModel(buffer, size);
+ _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs);
+
+ _state = State::MODEL_LOADED;
+ return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir)
+{
+ if (!isStateInitialized())
+ return NNFW_STATUS_INVALID_STATE;
+
if (!package_dir)
{
std::cerr << "package_dir is null." << std::endl;
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_UNEXPECTED_NULL;
}
if (!null_terminating(package_dir, MAX_PATH_LENGTH))
std::cerr << "invalid state";
}
std::cerr << std::endl;
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_INVALID_STATE;
}
if (!_subgraphs || !primary_subgraph() || primary_subgraph()->isBuildingPhase())
{
std::cerr << "Error during nnfw_session::run : "
<< "run should be run after prepare" << std::endl;
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_INVALID_STATE;
}
try
{
std::cerr << "Error during nnfw_session::run_async : "
<< "run_async should be run after prepare" << std::endl;
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_INVALID_STATE;
}
_execution->startExecute();
if (!isStatePreparedOrFinishedRun())
{
std::cerr << "Error during nnfw_session::set_input : invalid state" << std::endl;
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_INVALID_STATE;
}
if (!buffer && length != 0)
if (!isStatePreparedOrFinishedRun())
{
std::cerr << "Error during nnfw_session::set_output : invalid state" << std::endl;
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_INVALID_STATE;
}
if (!buffer && length != 0)
NNFW_STATUS nnfw_session::input_size(uint32_t *number)
{
if (isStateInitialized()) // Model is not loaded
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_INVALID_STATE;
try
{
if (number == nullptr)
{
std::cerr << "Error during nnfw_session::input_size, number is null pointer." << std::endl;
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_UNEXPECTED_NULL;
}
*number = primary_subgraph()->getInputs().size();
}
NNFW_STATUS nnfw_session::output_size(uint32_t *number)
{
if (isStateInitialized()) // Model is not loaded
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_INVALID_STATE;
try
{
if (number == nullptr)
{
std::cerr << "Error during nnfw_session::output_size, number is null pointer." << std::endl;
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_UNEXPECTED_NULL;
}
*number = primary_subgraph()->getOutputs().size();
}
{
std::cerr << "Error during set_input_tensorinfo : should be run after load_model"
<< std::endl;
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_INVALID_STATE;
}
if (ti.rank <= 0 || ti.rank > NNFW_MAX_RANK)
NNFW_STATUS nnfw_session::input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
{
+ if (isStateInitialized())
+ return NNFW_STATUS_INVALID_STATE;
+
try
{
if (ti == nullptr)
{
std::cerr << "Error during nnfw_session::input_tensorinfo, tensorinfo is null pointer."
<< std::endl;
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_UNEXPECTED_NULL;
}
if (index >= primary_subgraph()->getInputs().size())
{
NNFW_STATUS nnfw_session::output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
{
if (isStateInitialized())
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_INVALID_STATE;
if (ti == nullptr)
{
std::cerr << "Error during nnfw_session::output_tensorinfo, tensorinfo is null pointer."
<< std::endl;
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_UNEXPECTED_NULL;
}
if (index >= primary_subgraph()->getOutputs().size())
NNFW_STATUS nnfw_session::set_available_backends(const char *backends)
{
if (!isStateModelLoaded())
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_INVALID_STATE;
try
{
- if (!backends || null_terminating(backends, MAX_BACKEND_NAME_LENGTH) == false)
- {
+ if (!backends)
+ return NNFW_STATUS_UNEXPECTED_NULL;
+ if (null_terminating(backends, MAX_BACKEND_NAME_LENGTH) == false)
return NNFW_STATUS_ERROR;
- }
auto &options = _compiler->options();
NNFW_STATUS nnfw_session::set_op_backend(const char *op, const char *backend)
{
if (!isStateModelLoaded())
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_INVALID_STATE;
try
{
- if (!op || !null_terminating(op, MAX_OP_NAME_LENGTH) || !backend ||
+ if (!op || !backend)
+ return NNFW_STATUS_UNEXPECTED_NULL;
+ if (!null_terminating(op, MAX_OP_NAME_LENGTH) ||
!null_terminating(backend, MAX_BACKEND_NAME_LENGTH))
- {
return NNFW_STATUS_ERROR;
- }
auto key = get_op_backend_string(op);
NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
{
if (!isStateModelLoaded())
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_INVALID_STATE;
+
+ if (!key || !value)
+ return NNFW_STATUS_UNEXPECTED_NULL;
auto &options = _compiler->options();
NNFW_STATUS nnfw_session::get_config(const char *key, char *value, size_t value_size)
{
if (!isStateModelLoaded())
- return NNFW_STATUS_ERROR;
+ return NNFW_STATUS_INVALID_STATE;
+
+ if (!key || !value)
+ return NNFW_STATUS_UNEXPECTED_NULL;
auto &options = _compiler->options();
#define __API_NNFW_API_INTERNAL_H__
#include "nnfw.h"
-#include "nnfw_dev.h"
+#include "nnfw_experimental.h"
#include <util/GeneralConfigSource.h>
NNFW_STATUS set_available_backends(const char *backends);
NNFW_STATUS set_op_backend(const char *op, const char *backend);
+ //
+ // Internal-only API
+ //
+
NNFW_STATUS set_config(const char *key, const char *value);
NNFW_STATUS get_config(const char *key, char *value, size_t value_size);
+ NNFW_STATUS load_circle_from_buffer(uint8_t *buffer, size_t size);
+
private:
onert::ir::Graph *primary_subgraph();
bool isStateInitialized();
#include "exec/FunctionSequence.h"
#include "util/logging.h"
#include "util/Utils.h"
+#include "AclKernelGen.h"
namespace onert
{
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto block_size_alloc = _tensor_builder->at(block_size_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto block_size_tensor = _tensor_builder->at(block_size_index).get();
assert(_ctx.at(block_size_index).data());
auto fn = std::make_unique<::arm_compute::CLBatchToSpaceLayer>();
- fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- const auto input_sub_type = _ctx.at(ifm_index).typeInfo().type() == ir::DataType::BOOL8
- ? arm_compute::SubDataType::BOOL
- : arm_compute::SubDataType::NONE;
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto fn = std::make_unique<::arm_compute::CLCast>();
+ std::unique_ptr<::arm_compute::IFunction> fn;
+ if (ifm_tensor->data_type() == ofm_tensor->data_type())
+ {
+ auto l = std::make_unique<::arm_compute::CLCopy>();
+
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+
+ fn = std::move(l);
+ }
+ else
+ {
+ auto l = std::make_unique<::arm_compute::CLCast>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), input_sub_type);
+ // TODO Support converting float to int32 as round down
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+
+ fn = std::move(l);
+ }
auto acl_fn = asAclClFunction(std::move(fn));
ker_width, ker_height);
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ker_tensor = _tensor_builder->at(ker_index).get();
+ auto bias_tensor = _tensor_builder->at(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
auto fn = std::make_unique<::arm_compute::CLConvolutionLayer>(
_tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
- conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+ fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
+ ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
+ ::arm_compute::Size2D(1U, 1U), act_info);
_return_fn = asAclClFunction(std::move(fn));
}
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ker_tensor = _tensor_builder->at(ker_index).get();
+ auto bias_tensor = _tensor_builder->at(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
{
auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
- ofm_alloc->handle(), conv_info, multiplier, act_info);
+ fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
+ ofm_tensor->handle(), conv_info, multiplier, act_info);
_return_fn = asAclClFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+ node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
const auto activation = node.param().activation;
-
- VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
- VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
- VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
- VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
- VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
- VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
- VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
- VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
- VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl;
- VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl;
- VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl;
- VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX,
- ::arm_compute::Size2D{kw, kh},
- acl_common::asPadStrideInfo(padding, stride)};
-
- auto fn = std::make_unique<::arm_compute::CLPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclClFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+ node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
const auto activation = node.param().activation;
-
- VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
- VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
- VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
- VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
- VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl;
- VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
- VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
- VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
- VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl;
- VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl;
- VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl;
- VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{
- ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh},
- acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
- auto fn = std::make_unique<::arm_compute::CLPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclClFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Concat &node)
return;
}
- auto output_alloc = _tensor_builder->at(ofm_index).get();
+ auto output_tensor = _tensor_builder->at(ofm_index).get();
std::vector<::arm_compute::ICLTensor *> input_tensors;
for (auto &ifm_ind : input_indexes)
input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
if (input_indexes.size() < 2)
{
auto l = std::make_unique<::arm_compute::CLCopy>();
- l->configure(input_tensors.at(0), output_alloc->handle());
+ l->configure(input_tensors.at(0), output_tensor->handle());
fn = std::move(l);
}
else
auto l = std::make_unique<::arm_compute::CLConcatenateLayer>();
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = output_alloc->layout();
+ const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
- l->configure(input_tensors, output_alloc->handle(), fixed_axis);
+ l->configure(input_tensors, output_tensor->handle(), fixed_axis);
fn = std::move(l);
}
void KernelGenerator::visit(const ir::operation::FullyConnected &node)
{
- using ir::operation::FullyConnected;
-
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
- const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
- const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
-
- const auto input_rank = _ctx.at(input_index).shape().rank();
-
- const auto output_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
- UNUSED_RELEASE(output_size);
- assert(_ctx.at(bias_index).shape().dim(0) == output_size);
- assert(_ctx.at(weight_index).shape().dim(0) == output_size);
- const auto batch_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 2);
- const auto input_size =
- _ctx.at(weight_index).shape().dim(_ctx.at(weight_index).shape().rank() - 1);
-
- // Check for reshaping input's shape into rank-2
- bool needs_reshape = false;
- ir::Shape reshape(2);
- if (input_rank == 3 || input_rank == 4)
- {
- const auto &ifm_shape = _ctx.at(input_index).shape();
- auto feature_size = 1;
- for (int i = 0; i < ifm_shape.rank(); ++i)
- {
- feature_size *= ifm_shape.dim(i);
- }
-
- UNUSED_RELEASE(feature_size);
- assert(feature_size == batch_size * input_size);
-
- // for reshaping
- needs_reshape = true;
- reshape.dim(0) = batch_size; /* H */
- reshape.dim(1) = input_size; /* W */
- }
-
+ auto output_tensor = _tensor_builder->at(output_index).get();
const auto activation = node.param().activation;
- auto output_alloc = _tensor_builder->at(output_index).get();
- const auto input_alloc = _tensor_builder->at(input_index).get();
- const auto weight_alloc = _tensor_builder->at(weight_index).get();
- const auto bias_alloc = _tensor_builder->at(bias_index).get();
- const auto frontend_layout = _current_op_seq_layout;
- const auto acl_layout = output_alloc->handle()->info()->data_layout();
-
- auto fn = std::make_unique<arm_compute::CLFullyConnectedReshapingLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- arm_compute::CLFullyConnectedReshapingLayer::KernelType kernel_type =
- arm_compute::CLFullyConnectedReshapingLayer::KernelType::GENERAL;
- if (_ctx.at(weight_index).isConstant())
- {
- kernel_type = arm_compute::CLFullyConnectedReshapingLayer::KernelType::PREPROCESSED_WEIGHTS;
- assert(_ctx.at(weight_index).data());
- }
- fn->configure(
- input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(),
- needs_reshape,
- ::onert::backend::acl_common::asTensorShape(
- reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
- kernel_type);
-
+ auto fn = acl_common::kernelGenFullyConnected<acl_common::AclClFunction, ::arm_compute::ICLTensor,
+ ::arm_compute::CLFullyConnectedReshapingLayer>(
+ node, _ctx, _tensor_builder, _current_op_seq_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, output_alloc->handle()));
+ std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Mul &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::CLPixelWiseMultiplication>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclClFunction(std::move(fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto keep_dims{node.param().keep_dims};
const auto reduce_type = node.param().reduce_type;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
const auto input_rank = _ctx.at(input_index).shape().rank();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = input_alloc->layout();
+ const auto backend_layout = input_tensor->layout();
std::unique_ptr<arm_compute::IFunction> fn;
if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
const auto acl_axes =
acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
- l->configure(input_alloc->handle(), acl_axes, keep_dims, output_alloc->handle());
+ l->configure(input_tensor->handle(), acl_axes, keep_dims, output_tensor->handle());
fn = std::move(l);
}
_tensor_builder->acl_tensor_manager()->internal_buffer_manager());
const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout);
- l->configure(input_alloc->handle(), output_alloc->handle(), acl_axes, keep_dims,
+ l->configure(input_tensor->handle(), output_tensor->handle(), acl_axes, keep_dims,
acl_common::convertReduceType(reduce_type));
fn = std::move(l);
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = output_alloc->layout();
+ const auto backend_layout = output_tensor->layout();
assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
frontend_layout == backend_layout);
UNUSED_RELEASE(frontend_layout);
auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
(void)dims;
(void)ndim;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<arm_compute::CLReshapeLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
_return_fn = std::move(acl_fn);
}
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<arm_compute::CLActivationLayer>();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
auto acl_fn = asAclClFunction(std::move(fn));
const auto beta = node.param().beta;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<::arm_compute::CLSoftmaxLayer>(
_tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_alloc->handle(), output_alloc->handle(), beta);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
auto acl_fn = asAclClFunction(std::move(fn));
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto outputData_alloc = _tensor_builder->at(output_index).get();
- auto inputData_alloc = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_builder->at(output_index).get();
+ auto inputData_tensor = _tensor_builder->at(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = inputData_alloc->layout();
+ const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
int input_rank = _ctx.at(input_index).shape().rank();
auto fn = std::make_unique<::arm_compute::CLSlice>();
- fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set);
+ fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
auto acl_fn = asAclClFunction(std::move(fn));
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto outputData_alloc = _tensor_builder->at(output_index).get();
- auto inputData_alloc = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_builder->at(output_index).get();
+ auto inputData_tensor = _tensor_builder->at(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = inputData_alloc->layout();
+ const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
int input_rank = _ctx.at(input_index).shape().rank();
auto fn = std::make_unique<::arm_compute::CLStridedSlice>();
- fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set,
+ fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
strides_set, begin_mask, end_mask, shrink_axis_mask);
auto acl_fn = asAclClFunction(std::move(fn));
const auto rank = _ctx.at(ifm_idx).shape().rank();
- auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
- auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = ifm_alloc->layout();
+ const auto backend_layout = ifm_tensor->layout();
std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
// Reversed
auto fn = std::make_unique<::arm_compute::CLPermute>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
auto acl_fn = asAclClFunction(std::move(fn));
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::CLArithmeticAddition>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
arm_compute::ConvertPolicy::SATURATE);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclClFunction(std::move(fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Sub &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::CLArithmeticSubtraction>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
arm_compute::ConvertPolicy::SATURATE);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclClFunction(std::move(fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Div &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::CLArithmeticDivision>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclClFunction(std::move(fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Exp &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<::arm_compute::CLExpLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto gamma_alloc = _tensor_builder->at(gamma_index).get();
- auto beta_alloc = _tensor_builder->at(beta_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto gamma_tensor = _tensor_builder->at(gamma_index).get();
+ auto beta_tensor = _tensor_builder->at(beta_index).get();
auto epsilon = node.param().epsilon;
auto activation = node.param().activation;
auto fn = std::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), gamma_alloc->handle(),
- beta_alloc->handle(), epsilon);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
+ beta_tensor->handle(), epsilon);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclClFunction(std::move(fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Logistic &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
auto acl_fn = asAclClFunction(std::move(fn));
const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input0_tensor = _tensor_builder->at(input0_index).get();
+ auto input1_tensor = _tensor_builder->at(input1_index).get();
auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>();
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
+ fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
::arm_compute::BinaryLogicalOperation::AND);
auto acl_fn = asAclClFunction(std::move(fn));
void KernelGenerator::visit(const ir::operation::LSTM &node)
{
- // TODO Support dynamic rnn
- // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
- const auto scratch_buffer_index{
- node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
- const auto output_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
- const auto cell_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
- const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
-
- const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
- const auto input_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
- const auto input_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
- const auto input_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
- const auto input_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
- const auto recurrent_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
- const auto recurrent_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
- const auto recurrent_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
- const auto recurrent_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
- const auto cell_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
- const auto cell_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
- const auto cell_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
- const auto input_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
- const auto forget_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
- const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
- const auto output_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
- const auto projection_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
- const auto projection_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
- const auto output_state_in_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
- const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
- const auto cell_threshold = node.param().cell_threshold;
- const auto projection_threshold = node.param().projection_threshold;
-
- bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
- bool has_recurrent_to_input_weights =
- _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
- bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
- bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
- bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
- _ctx.at(projection_weights_index).shape().dim(1) != 0;
- bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
-
- // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
- // true: no CIFG
- // false: CIFG
- // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
- bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
-
- // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
- // But the cell_to_input_weights does not exist in regular CIFG although peephole.
- // true: peephole
- // false: no peephole
- bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
-
- // NOTE Although the projection weights has data the projection bias may not have data.
- bool has_projection_param = has_projection_weights;
-
- const auto activation = node.param().activation;
- const auto cell_clip = cell_threshold;
- const auto projection_clip = projection_threshold;
- assert(cell_clip >= 0.f && projection_clip >= 0.f);
-
- auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get();
- auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get();
- auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get();
- auto output_alloc = _tensor_builder->at(output_index).get();
-
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get();
- auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get();
- auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get();
- auto recurrent_to_forget_weights_alloc =
- _tensor_builder->at(recurrent_to_forget_weights_index).get();
- auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get();
- auto recurrent_to_output_weights_alloc =
- _tensor_builder->at(recurrent_to_output_weights_index).get();
-
- auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get();
- auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get();
- auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get();
- auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get();
- auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get();
-
- auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
-
- auto fn = std::make_unique<::arm_compute::CLLSTMLayer>();
-
- ::arm_compute::LSTMParams<::arm_compute::ICLTensor> lstm_params{};
- if (has_cifg_param)
- {
- auto input_to_input_weights_alloc =
- _tensor_builder->at(input_to_input_weights_index).get(); // optional
- auto recurrent_to_input_weights_alloc =
- _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
- auto cell_to_input_weights_handle =
- has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle()
- : nullptr; // optional (non-cifg && peephole)
- auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional
- lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(),
- recurrent_to_input_weights_alloc->handle(),
- cell_to_input_weights_handle, input_gate_bias_alloc->handle());
- }
- if (has_peephole_param)
- {
- auto cell_to_forget_weights_alloc =
- _tensor_builder->at(cell_to_forget_weights_index).get(); // optional
- auto cell_to_output_weights_alloc =
- _tensor_builder->at(cell_to_output_weights_index).get(); // optional
- lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(),
- cell_to_output_weights_alloc->handle());
- }
- if (has_projection_param)
- {
- auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional
- auto projection_bias_handle = has_projection_bias
- ? _tensor_builder->at(projection_bias_index).get()->handle()
- : nullptr; // optional
- lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle);
- }
-
- fn->configure(
- input_alloc->handle(), input_to_forget_weights_alloc->handle(),
- input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(),
- recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(),
- recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(),
- cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(),
- cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(),
- output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(),
- lstm_params, act_info, cell_clip, projection_clip);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = acl_common::kernelGenLSTM<acl_common::AclClFunction, ::arm_compute::ICLTensor,
+ ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_builder);
}
void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto comparison_type = node.param().comparison_type;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input0_tensor = _tensor_builder->at(input0_index).get();
+ auto input1_tensor = _tensor_builder->at(input1_index).get();
auto fn = std::make_unique<::arm_compute::CLComparison>();
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
+ fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
(arm_compute::ComparisonOperation)comparison_type);
auto acl_fn = asAclClFunction(std::move(fn));
for (const auto &input_index : input_indexes)
{
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_alloc = _tensor_builder->at(input_index);
- orig_inputs_acl_tensor_shapes.emplace_back(input_alloc->info()->tensor_shape());
- assert(input_rank == input_alloc->num_dimensions());
- if (input_rank != input_alloc->info()->num_dimensions())
+ const auto &input_tensor = _tensor_builder->at(input_index);
+ orig_inputs_acl_tensor_shapes.emplace_back(input_tensor->info()->tensor_shape());
+ assert(input_rank == input_tensor->num_dimensions());
+ if (input_rank != input_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- input_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+ input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
_ctx.at(input_index).shape(), _current_op_seq_layout, backend_layout, false));
}
}
const auto ofm_idx{node.getOutputs().at(0)};
const auto ifm_idx{node.getInputs().at(0)};
const auto permute_type = node.getPermuteType();
- auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
- auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
const auto rank = _ctx.at(ofm_idx).shape().rank();
assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
auto l = std::make_unique<::arm_compute::CLPermute>();
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
fn = std::move(l);
}
auto l = std::make_unique<::arm_compute::CLPermute>();
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
fn = std::move(l);
}
{
auto l = std::make_unique<::arm_compute::CLCopy>();
- l->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle());
fn = std::move(l);
}
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::CLRsqrtLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
_return_fn = asAclClFunction(std::move(fn));
}
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<arm_compute::CLActivationLayer>();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
auto acl_fn = asAclClFunction(std::move(fn));
const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::CLScale>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(),
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
auto acl_fn = asAclClFunction(std::move(fn));
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
auto acl_fn = asAclClFunction(std::move(fn));
const auto activation = node.param().activation;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
- auto weights_alloc = _tensor_builder->at(weights_index).get();
- auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
- auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
+ auto weights_tensor = _tensor_builder->at(weights_index).get();
+ auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
+ auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
auto copy_layer = std::make_unique<::arm_compute::CLCopy>();
- copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle());
+ copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
_return_fn = asAclClFunction(std::move(copy_layer));
- auto fn = std::make_unique<::arm_compute::CLRNNLayerEx>(
+ auto fn = std::make_unique<::arm_compute::CLRNNLayer>(
_tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_alloc->handle(), weights_alloc->handle(), recurrent_weights_alloc->handle(),
- bias_alloc->handle(), hidden_state_out_alloc->handle(), output_alloc->handle(),
- act_info);
+ fn->configure(input_tensor->handle(), weights_tensor->handle(),
+ recurrent_weights_tensor->handle(), bias_tensor->handle(),
+ hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
_return_fn = asAclClFunction(std::move(fn));
}
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::CLFloor>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto block_size_alloc = _tensor_builder->at(block_size_index).get();
- auto paddings_alloc = _tensor_builder->at(paddings_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+ auto paddings_tensor = _tensor_builder->at(paddings_index).get();
assert(_ctx.at(block_size_index).data());
assert(_ctx.at(paddings_index).data());
std::unique_ptr<::arm_compute::IFunction> fn;
auto l = std::make_unique<::arm_compute::CLSpaceToBatchLayer>();
- l->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(),
- ofm_alloc->handle());
+ l->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+ ofm_tensor->handle());
fn = std::move(l);
auto acl_fn = asAclClFunction(std::move(fn));
auto block_size = node.param().block_size;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto fn = std::make_unique<::arm_compute::CLSpaceToDepth>();
+ auto fn = std::make_unique<::arm_compute::CLSpaceToDepthLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
auto acl_fn = asAclClFunction(std::move(fn));
void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::L2Pool2D::Input::INPUT)};
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+ node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-
- uint32_t kw = node.param().kw;
- uint32_t kh = node.param().kh;
- const auto stride = node.param().stride;
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{
- ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh},
- ::onert::backend::acl_common::asPadStrideInfo(padding, stride)};
-
- auto fn = std::make_unique<::arm_compute::CLPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclClFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto lookups_alloc = _tensor_builder->at(lookups_index).get();
- auto values_alloc = _tensor_builder->at(values_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto lookups_tensor = _tensor_builder->at(lookups_index).get();
+ auto values_tensor = _tensor_builder->at(values_index).get();
auto fn = std::make_unique<::arm_compute::CLEmbeddingLookup>();
- fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle());
+ fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
float bias = 0.0f; // Don't offset the reduction.
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
radius, alpha, beta, bias, false);
auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
auto acl_fn = asAclClFunction(std::move(fn));
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto hits_alloc = _tensor_builder->at(hits_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto hits_tensor = _tensor_builder->at(hits_index).get();
- auto lookups_alloc = _tensor_builder->at(lookups_index).get();
- auto keys_alloc = _tensor_builder->at(keys_index).get();
- auto values_alloc = _tensor_builder->at(values_index).get();
+ auto lookups_tensor = _tensor_builder->at(lookups_index).get();
+ auto keys_tensor = _tensor_builder->at(keys_index).get();
+ auto values_tensor = _tensor_builder->at(values_index).get();
auto fn = std::make_unique<::arm_compute::CLHashtableLookup>();
- fn->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(),
- output_alloc->handle(), hits_alloc->handle());
+ fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+ output_tensor->handle(), hits_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto alpha_alloc = _tensor_builder->at(alpha_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto alpha_tensor = _tensor_builder->at(alpha_index).get();
- auto fn = std::make_unique<::arm_compute::CLPReLU>();
+ auto fn = std::make_unique<::arm_compute::CLPReluLayer>();
- fn->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
(node.param().padding.type == ir::PaddingType::VALID));
auto padding = ir::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
ker_shape.W, ker_shape.H);
-
uint32_t invalid_horizontal = 0;
uint32_t invalid_vertical = 0;
if (node.param().padding.type == ir::PaddingType::VALID)
invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
}
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ker_tensor = _tensor_builder->at(ker_index).get();
const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
auto fn = std::make_unique<::arm_compute::CLTransposeConvLayer>(
_tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info,
- invalid_horizontal, invalid_vertical);
+ fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
+ tconv_info, invalid_horizontal, invalid_vertical);
auto acl_fn = asAclClFunction(std::move(fn));
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
auto acl_fn = asAclClFunction(std::move(fn));
const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input0_tensor = _tensor_builder->at(input0_index).get();
+ auto input1_tensor = _tensor_builder->at(input1_index).get();
auto fn = std::make_unique<::arm_compute::CLBitwiseOr>();
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+ fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<::arm_compute::CLBitwiseNot>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
const auto k = node.param().k;
- auto values_alloc = _tensor_builder->at(outputValues_index).get();
- auto indices_alloc = _tensor_builder->at(outputIndices_index).get();
- auto input_alloc = _tensor_builder->at(inputData_index).get();
+ auto values_tensor = _tensor_builder->at(outputValues_index).get();
+ auto indices_tensor = _tensor_builder->at(outputIndices_index).get();
+ auto input_tensor = _tensor_builder->at(inputData_index).get();
auto fn = std::make_unique<::arm_compute::CLTopKV2>();
- fn->configure(input_alloc->handle(), k, values_alloc->handle(), indices_alloc->handle());
+ fn->configure(input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto indices_alloc = _tensor_builder->at(indices_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto indices_tensor = _tensor_builder->at(indices_index).get();
// NOTE The frontend layout and backend layout must be the same for this operation.
// If not the same, we have to add a stage(?) to perform permutation of output tensor. It
// a model. For example, if a model in NHWC has this operation as output rank == 4, indices
// rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
// and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
- const auto backend_layout = ofm_alloc->layout();
+ const auto backend_layout = ofm_tensor->layout();
UNUSED_RELEASE(backend_layout);
- assert(backend_layout == ifm_alloc->layout());
- assert(backend_layout == indices_alloc->layout());
+ assert(backend_layout == ifm_tensor->layout());
+ assert(backend_layout == indices_tensor->layout());
assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
auto fn = std::make_unique<::arm_compute::CLGatherEx>();
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
- assert(n == ifm_alloc->num_dimensions());
+ assert(n == ifm_tensor->num_dimensions());
size_t k = _ctx.at(indices_index).shape().rank();
- assert(k == indices_alloc->num_dimensions());
+ assert(k == indices_tensor->num_dimensions());
// Disable applied dim_correction
- const auto orig_ifm_acl_tensor_shape = ifm_alloc->info()->tensor_shape();
- if (n != ifm_alloc->info()->num_dimensions())
+ const auto orig_ifm_acl_tensor_shape = ifm_tensor->info()->tensor_shape();
+ if (n != ifm_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and ifm tensor is applied dim_correction
const auto ifm = _ctx.at(ifm_index);
- ifm_alloc->info()->set_tensor_shape(
+ ifm_tensor->info()->set_tensor_shape(
acl_common::asTensorShape(ifm.shape(), _current_op_seq_layout, backend_layout, false));
}
- const auto orig_indice_acl_tensor_shape = indices_alloc->info()->tensor_shape();
- if (k != indices_alloc->info()->num_dimensions())
+ const auto orig_indice_acl_tensor_shape = indices_tensor->info()->tensor_shape();
+ if (k != indices_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and indices tensor is applied dim_correction
const auto indices = _ctx.at(indices_index);
- indices_alloc->info()->set_tensor_shape(
+ indices_tensor->info()->set_tensor_shape(
acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
}
- fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis);
+ fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
// Revert disabling applied dim_correction
- ifm_alloc->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
- indices_alloc->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
+ ifm_tensor->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
+ indices_tensor->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
auto acl_fn = asAclClFunction(std::move(fn));
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::CLNeg>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
auto acl_fn = asAclClFunction(std::move(fn));
assert((ifm_shape.rank() - 1) == ofm_shape.rank());
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
auto frontend_layout = _current_op_seq_layout;
- auto backend_layout = ifm_alloc->layout();
+ auto backend_layout = ifm_tensor->layout();
int axis_value = node.param().axis;
if (axis_value < 0)
auto acl_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::CLArgOperation>();
+ auto fn = std::make_unique<::arm_compute::CLArgMinMaxLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), {acl_axis},
- ::arm_compute::ArgOperation::MAX);
+ fn->configure(ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
+ ::arm_compute::ReductionOperation::ARG_IDX_MAX);
auto acl_fn = asAclClFunction(std::move(fn));
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
- auto fn = std::make_unique<::arm_compute::CLCast>();
+ auto fn = std::make_unique<::arm_compute::CLDequantizationLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle(), arm_compute::SubDataType::NONE);
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
auto beta = node.param().beta;
auto bias = node.param().bias;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(
::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
auto acl_fn = asAclClFunction(std::move(fn));
auto block_size = node.param().block_size;
assert(block_size > 0);
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
- auto fn = std::make_unique<::arm_compute::CLDepthToSpace>();
+ auto fn = std::make_unique<::arm_compute::CLDepthToSpaceLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle(), block_size);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
auto acl_fn = asAclClFunction(std::move(fn));
for (const auto &output : node.getOutputs())
output_indexes.emplace_back(output);
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- std::vector<arm_compute::ICLTensor *> output_allocs;
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ std::vector<arm_compute::ICLTensor *> output_tensors;
for (const auto &ofm_ind : output_indexes)
- output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+ output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = ifm_alloc->layout();
+ const auto backend_layout = ifm_tensor->layout();
auto axis = node.param().axis;
if (axis < 0)
axis += ifm_rank;
auto fn = std::make_unique<::arm_compute::CLSplit>();
- fn->configure(ifm_alloc->handle(), output_allocs, axis);
+ fn->configure(ifm_tensor->handle(), output_tensors, axis);
_return_fn = asAclClFunction(std::move(fn));
}
for (const auto &output_index : output_indexes)
{
size_t output_rank = _ctx.at(output_index).shape().rank();
- const auto &output_alloc = _tensor_builder->at(output_index);
- orig_outputs_acl_tensor_shapes.emplace_back(output_alloc->info()->tensor_shape());
- assert(output_rank == output_alloc->num_dimensions());
- if (output_rank != output_alloc->info()->num_dimensions())
+ const auto &output_tensor = _tensor_builder->at(output_index);
+ orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
+ assert(output_rank == output_tensor->num_dimensions());
+ if (output_rank != output_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- output_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+ output_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
_ctx.at(output_index).shape(), _current_op_seq_layout, backend_layout, false));
}
}
// Disable applied dim_correction
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_alloc = _tensor_builder->at(input_index);
- assert(input_rank == input_alloc->num_dimensions());
- if (input_rank != input_alloc->info()->num_dimensions())
+ const auto &input_tensor = _tensor_builder->at(input_index);
+ assert(input_rank == input_tensor->num_dimensions());
+ if (input_rank != input_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- input_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+ input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
_ctx.at(input_index).shape(), frontend_layout, backend_layout, false));
}
const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::CLElementwiseMin>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::CLElementwiseMax>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclClFunction(std::move(fn));
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), ::arm_compute::ConvertPolicy::SATURATE,
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
0);
auto acl_fn = asAclClFunction(std::move(fn));
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), ::arm_compute::ConvertPolicy::SATURATE,
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
0);
auto acl_fn = asAclClFunction(std::move(fn));
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_KERNEL_GEN_H_
+#define __ONERT_BACKEND_ACL_COMMON_ACL_KERNEL_GEN_H_
+
+#include <exec/IFunction.h>
+#include <ir/Operands.h>
+
+#include <ir/operation/LSTM.h>
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
+ typename T_TensorBuilder>
+std::unique_ptr<exec::IFunction>
+kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
+ const std::shared_ptr<T_TensorBuilder> &tensor_builder)
+{
+ // TODO Support dynamic rnn
+ // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
+ const auto scratch_buffer_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+ const auto output_state_out_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+ const auto cell_state_out_index{
+ node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+ const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+
+ const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+ const auto input_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
+ const auto input_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+ const auto input_to_cell_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+ const auto input_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+ const auto recurrent_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
+ const auto recurrent_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+ const auto recurrent_to_cell_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+ const auto recurrent_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+ const auto cell_to_input_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
+ const auto cell_to_forget_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
+ const auto cell_to_output_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
+ const auto input_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
+ const auto forget_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+ const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
+ const auto output_gate_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+ const auto projection_weights_index{
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
+ const auto projection_bias_index{
+ node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
+ const auto output_state_in_index{
+ node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+ const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
+ const auto cell_threshold = node.param().cell_threshold;
+ const auto projection_threshold = node.param().projection_threshold;
+
+ bool has_input_to_input_weights = operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(input_to_input_weights_index).shape().dim(1) != 0;
+ bool has_recurrent_to_input_weights =
+ operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+ bool has_cell_to_forget_weights = operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
+ bool has_cell_to_output_weights = operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
+ bool has_projection_weights = operands.at(projection_weights_index).shape().dim(0) != 0 &&
+ operands.at(projection_weights_index).shape().dim(1) != 0;
+ bool has_projection_bias = operands.at(projection_bias_index).shape().dim(0);
+
+ // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
+ // true: no CIFG
+ // false: CIFG
+ // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
+ bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+
+ // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
+ // But the cell_to_input_weights does not exist in regular CIFG although peephole.
+ // true: peephole
+ // false: no peephole
+ bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
+
+ // NOTE Although the projection weights has data the projection bias may not have data.
+ bool has_projection_param = has_projection_weights;
+
+ const auto activation = node.param().activation;
+ const auto cell_clip = cell_threshold;
+ const auto projection_clip = projection_threshold;
+ assert(cell_clip >= 0.f && projection_clip >= 0.f);
+
+ auto scratch_buffer_tensor = tensor_builder->at(scratch_buffer_index).get();
+ auto output_state_out_tensor = tensor_builder->at(output_state_out_index).get();
+ auto cell_state_out_tensor = tensor_builder->at(cell_state_out_index).get();
+ auto output_tensor = tensor_builder->at(output_index).get();
+
+ auto input_tensor = tensor_builder->at(input_index).get();
+
+ auto input_to_forget_weights_tensor = tensor_builder->at(input_to_forget_weights_index).get();
+ auto input_to_cell_weights_tensor = tensor_builder->at(input_to_cell_weights_index).get();
+ auto input_to_output_weights_tensor = tensor_builder->at(input_to_output_weights_index).get();
+ auto recurrent_to_forget_weights_tensor =
+ tensor_builder->at(recurrent_to_forget_weights_index).get();
+ auto recurrent_to_cell_weights_tensor = tensor_builder->at(recurrent_to_cell_weights_index).get();
+ auto recurrent_to_output_weights_tensor =
+ tensor_builder->at(recurrent_to_output_weights_index).get();
+
+ auto forget_gate_bias_tensor = tensor_builder->at(forget_gate_bias_index).get();
+ auto cell_bias_tensor = tensor_builder->at(cell_bias_index).get();
+ auto output_gate_bias_tensor = tensor_builder->at(output_gate_bias_index).get();
+ auto output_state_in_tensor = tensor_builder->at(output_state_in_index).get();
+ auto cell_state_in_tensor = tensor_builder->at(cell_state_in_index).get();
+
+ auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
+
+ auto fn = std::make_unique<T_ACLLayer>();
+
+ ::arm_compute::LSTMParams<T_Tensor> lstm_params{};
+ if (has_cifg_param)
+ {
+ auto input_to_input_weights_tensor =
+ tensor_builder->at(input_to_input_weights_index).get(); // optional
+ auto recurrent_to_input_weights_tensor =
+ tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
+ auto cell_to_input_weights_handle =
+ has_peephole_param ? tensor_builder->at(cell_to_input_weights_index).get()->handle()
+ : nullptr; // optional (non-cifg && peephole)
+ auto input_gate_bias_tensor = tensor_builder->at(input_gate_bias_index).get(); // optional
+ lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
+ recurrent_to_input_weights_tensor->handle(),
+ cell_to_input_weights_handle, input_gate_bias_tensor->handle());
+ }
+ if (has_peephole_param)
+ {
+ auto cell_to_forget_weights_tensor =
+ tensor_builder->at(cell_to_forget_weights_index).get(); // optional
+ auto cell_to_output_weights_tensor =
+ tensor_builder->at(cell_to_output_weights_index).get(); // optional
+ lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
+ cell_to_output_weights_tensor->handle());
+ }
+ if (has_projection_param)
+ {
+ auto projection_weights_tensor = tensor_builder->at(projection_weights_index).get(); // optional
+ auto projection_bias_handle = has_projection_bias
+ ? tensor_builder->at(projection_bias_index).get()->handle()
+ : nullptr; // optional
+ lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
+ }
+
+ fn->configure(input_tensor->handle(), input_to_forget_weights_tensor->handle(),
+ input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
+ recurrent_to_forget_weights_tensor->handle(),
+ recurrent_to_cell_weights_tensor->handle(),
+ recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
+ cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
+ output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
+ scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
+ cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info,
+ cell_clip, projection_clip);
+
+ return std::make_unique<T_FunctionWrapper>(std::move(fn));
+}
+
+template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
+ typename T_TensorBuilder>
+std::unique_ptr<exec::IFunction>
+kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands,
+ const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout)
+{
+ using ir::operation::FullyConnected;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+ const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+ const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+
+ const auto input_rank = operands.at(input_index).shape().rank();
+
+ const auto output_size =
+ operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
+ UNUSED_RELEASE(output_size);
+ assert(operands.at(bias_index).shape().dim(0) == output_size);
+ assert(operands.at(weight_index).shape().dim(0) == output_size);
+ const auto batch_size =
+ operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
+ const auto input_size =
+ operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
+
+ // Check for reshaping input's shape into rank-2
+ bool needs_reshape = false;
+ ir::Shape reshape(2);
+ if (input_rank == 3 || input_rank == 4)
+ {
+ const auto &ifm_shape = operands.at(input_index).shape();
+ auto feature_size = 1;
+ for (int i = 0; i < ifm_shape.rank(); ++i)
+ {
+ feature_size *= ifm_shape.dim(i);
+ }
+
+ UNUSED_RELEASE(feature_size);
+ assert(feature_size == batch_size * input_size);
+
+ // for reshaping
+ needs_reshape = true;
+ reshape.dim(0) = batch_size; /* H */
+ reshape.dim(1) = input_size; /* W */
+ }
+
+ auto output_tensor = tensor_builder->at(output_index).get();
+ const auto input_tensor = tensor_builder->at(input_index).get();
+ const auto weight_tensor = tensor_builder->at(weight_index).get();
+ const auto bias_tensor = tensor_builder->at(bias_index).get();
+ const auto frontend_layout = layout;
+ const auto acl_layout = output_tensor->handle()->info()->data_layout();
+
+ auto fn =
+ std::make_unique<T_ACLLayer>(tensor_builder->acl_tensor_manager()->internal_buffer_manager());
+
+ typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL;
+ if (operands.at(weight_index).isConstant())
+ {
+ kernel_type = T_ACLLayer::KernelType::PREPROCESSED_WEIGHTS;
+ assert(operands.at(weight_index).data());
+ }
+
+ fn->configure(
+ input_tensor->handle(), weight_tensor->handle(), bias_tensor->handle(),
+ output_tensor->handle(), needs_reshape,
+ ::onert::backend::acl_common::asTensorShape(
+ reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
+ kernel_type);
+
+ return std::make_unique<T_FunctionWrapper>(std::move(fn));
+}
+
+template <typename T_ACLLayer, typename T_PoolOp, typename T_TensorBuilder>
+std::unique_ptr<::arm_compute::IFunction>
+kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
+ const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout,
+ ::arm_compute::PoolingType pooling_type)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(0)};
+
+ const auto ofm_shape = operands.at(ofm_index).shape().asFeature(layout);
+ const auto ifm_shape = operands.at(ifm_index).shape().asFeature(layout);
+
+ const auto kh = node.param().kh;
+ const auto kw = node.param().kw;
+ const auto stride = node.param().stride;
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+
+ VERBOSE(Pool2DParam) << "IFM_H: " << ifm_shape.H << std::endl;
+ VERBOSE(Pool2DParam) << "IFM_W: " << ifm_shape.W << std::endl;
+ VERBOSE(Pool2DParam) << "OFM_H: " << ofm_shape.H << std::endl;
+ VERBOSE(Pool2DParam) << "OFM_W: " << ofm_shape.W << std::endl;
+ VERBOSE(Pool2DParam) << "KER_H: " << kh << std::endl;
+ VERBOSE(Pool2DParam) << "KER_W: " << kw << std::endl;
+ VERBOSE(Pool2DParam) << "STRIDE_H: " << stride.vertical << std::endl;
+ VERBOSE(Pool2DParam) << "STRIDE_W: " << stride.horizontal << std::endl;
+ VERBOSE(Pool2DParam) << "PAD(T): " << padding.top << std::endl;
+ VERBOSE(Pool2DParam) << "PAD(B): " << padding.bottom << std::endl;
+ VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
+ VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
+
+ auto ofm_tensor = tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = tensor_builder->at(ifm_index).get();
+
+ ::arm_compute::PoolingLayerInfo info{
+ pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
+ acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
+
+ auto fn = std::make_unique<T_ACLLayer>();
+
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), info);
+
+ return fn;
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACL_KERNEL_GEN_H_
#include "exec/NopFunction.h"
#include "util/logging.h"
#include "util/Utils.h"
+#include "AclKernelGen.h"
namespace onert
{
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
auto acl_fn = asAclFunction(std::move(fn));
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto frontend_layout = _current_op_seq_layout;
- auto backend_layout = ifm_alloc->layout();
+ auto backend_layout = ifm_tensor->layout();
int axis_value = node.param().axis;
if (axis_value < 0)
auto fn = std::make_unique<::arm_compute::NEArgMinMaxLayer>();
- fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(),
+ fn->configure(ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
arm_compute::ReductionOperation::ARG_IDX_MAX);
auto acl_fn = asAclFunction(std::move(fn));
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto block_size_alloc = _tensor_builder->at(block_size_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto block_size_tensor = _tensor_builder->at(block_size_index).get();
assert(_ctx.at(block_size_index).data());
auto fn = std::make_unique<::arm_compute::NEBatchToSpaceLayer>();
- fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto fn = std::make_unique<::arm_compute::NECast>();
+ std::unique_ptr<::arm_compute::IFunction> fn;
+ if (ifm_tensor->data_type() == ofm_tensor->data_type())
+ {
+ auto l = std::make_unique<::arm_compute::NECopy>();
- auto input_sub_type = _ctx.at(ifm_index).typeInfo().type() == ir::DataType::BOOL8
- ? arm_compute::SubDataType::BOOL
- : arm_compute::SubDataType::NONE;
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), input_sub_type);
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+
+ fn = std::move(l);
+ }
+ else
+ {
+ auto l = std::make_unique<::arm_compute::NECast>();
+
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+
+ fn = std::move(l);
+ }
auto acl_fn = asAclFunction(std::move(fn));
ker_width, ker_height);
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ker_tensor = _tensor_builder->at(ker_index).get();
+ auto bias_tensor = _tensor_builder->at(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
auto fn = std::make_unique<::arm_compute::NEConvolutionLayer>(
_tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
- conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+ fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
+ ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
+ ::arm_compute::Size2D(1U, 1U), act_info);
_return_fn = asAclFunction(std::move(fn));
}
auto block_size = node.param().block_size;
assert(block_size > 0);
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
- auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayerEx>();
+ auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle(), block_size);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
auto acl_fn = asAclFunction(std::move(fn));
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ker_tensor = _tensor_builder->at(ker_index).get();
+ auto bias_tensor = _tensor_builder->at(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
{
auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
- ofm_alloc->handle(), conv_info, multiplier, act_info);
+ fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
+ ofm_tensor->handle(), conv_info, multiplier, act_info);
_return_fn = asAclFunction(std::move(fn));
}
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<::arm_compute::NEDequantizationLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+ node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
const auto activation = node.param().activation;
-
- VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
- VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
- VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
- VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
- VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
- VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
- VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
- VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
- VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl;
- VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl;
- VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl;
- VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX,
- ::arm_compute::Size2D{kw, kh},
- acl_common::asPadStrideInfo(padding, stride)};
-
- auto fn = std::make_unique<::arm_compute::NEPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+ node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
const auto activation = node.param().activation;
-
- VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
- VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
- VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
- VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
- VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl;
- VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
- VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
- VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
- VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl;
- VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl;
- VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl;
- VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{
- ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh},
- acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
- auto fn = std::make_unique<::arm_compute::NEPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Concat &node)
return;
}
- auto output_alloc = _tensor_builder->at(ofm_index).get();
+ auto output_tensor = _tensor_builder->at(ofm_index).get();
std::vector<::arm_compute::ITensor *> input_tensors;
for (const auto &ifm_ind : input_indexes)
input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
if (input_indexes.size() < 2)
{
auto l = std::make_unique<::arm_compute::NECopy>();
- l->configure(input_tensors.at(0), output_alloc->handle());
+ l->configure(input_tensors.at(0), output_tensor->handle());
fn = std::move(l);
}
else
auto l = std::make_unique<::arm_compute::NEConcatenateLayer>();
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = output_alloc->layout();
+ const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
- l->configure(input_tensors, output_alloc->handle(), fixed_axis);
+ l->configure(input_tensors, output_tensor->handle(), fixed_axis);
fn = std::move(l);
}
const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto lookups_alloc = _tensor_builder->at(lookups_index).get();
- auto values_alloc = _tensor_builder->at(values_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto lookups_tensor = _tensor_builder->at(lookups_index).get();
+ auto values_tensor = _tensor_builder->at(values_index).get();
auto fn = std::make_unique<::arm_compute::NEEmbeddingLookup>();
- fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle());
+ fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::NEFloor>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
void KernelGenerator::visit(const ir::operation::FullyConnected &node)
{
- using ir::operation::FullyConnected;
-
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
- const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
- const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
-
- const auto input_rank = _ctx.at(input_index).shape().rank();
-
- const auto output_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
- UNUSED_RELEASE(output_size);
- assert(_ctx.at(bias_index).shape().dim(0) == output_size);
- assert(_ctx.at(weight_index).shape().dim(0) == output_size);
- const auto batch_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 2);
- const auto input_size =
- _ctx.at(weight_index).shape().dim(_ctx.at(weight_index).shape().rank() - 1);
-
- // Check for reshaping input's shape into rank-2
- bool needs_reshape = false;
- ir::Shape reshape(2);
- if (input_rank == 3 || input_rank == 4)
- {
- const auto &ifm_shape = _ctx.at(input_index).shape();
- auto feature_size = 1;
- for (int i = 0; i < ifm_shape.rank(); ++i)
- {
- feature_size *= ifm_shape.dim(i);
- }
-
- UNUSED_RELEASE(feature_size);
- assert(feature_size == batch_size * input_size);
-
- // for reshaping
- needs_reshape = true;
- reshape.dim(0) = batch_size; /* H */
- reshape.dim(1) = input_size; /* W */
- }
-
+ auto output_tensor = _tensor_builder->at(output_index).get();
const auto activation = node.param().activation;
- auto output_alloc = _tensor_builder->at(output_index).get();
- const auto input_alloc = _tensor_builder->at(input_index).get();
- const auto weight_alloc = _tensor_builder->at(weight_index).get();
- const auto bias_alloc = _tensor_builder->at(bias_index).get();
- const auto frontend_layout = _current_op_seq_layout;
- const auto acl_layout = output_alloc->handle()->info()->data_layout();
-
- auto fn = std::make_unique<arm_compute::NEFullyConnectedReshapingLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- arm_compute::NEFullyConnectedReshapingLayer::KernelType kernel_type =
- arm_compute::NEFullyConnectedReshapingLayer::KernelType::GENERAL;
- if (_ctx.at(weight_index).isConstant())
- {
- kernel_type = arm_compute::NEFullyConnectedReshapingLayer::KernelType::PREPROCESSED_WEIGHTS;
- assert(_ctx.at(weight_index).data());
- }
-
- fn->configure(
- input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(),
- needs_reshape,
- ::onert::backend::acl_common::asTensorShape(
- reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
- kernel_type);
-
+ auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
+ ::arm_compute::NEFullyConnectedReshapingLayer>(
+ node, _ctx, _tensor_builder, _current_op_seq_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)),
- ActivationBuilder::generate(activation, output_alloc->handle()));
+ std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto hits_alloc = _tensor_builder->at(hits_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto hits_tensor = _tensor_builder->at(hits_index).get();
- auto lookups_alloc = _tensor_builder->at(lookups_index).get();
- auto keys_alloc = _tensor_builder->at(keys_index).get();
- auto values_alloc = _tensor_builder->at(values_index).get();
+ auto lookups_tensor = _tensor_builder->at(lookups_index).get();
+ auto keys_tensor = _tensor_builder->at(keys_index).get();
+ auto values_tensor = _tensor_builder->at(values_index).get();
auto fn = std::make_unique<::arm_compute::NEHashtableLookup>();
- fn->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(),
- output_alloc->handle(), hits_alloc->handle());
+ fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+ output_tensor->handle(), hits_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
// Converting in reverse order
const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto indices_alloc = _tensor_builder->at(indices_index).get();
- const auto backend_layout = ofm_alloc->layout();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto indices_tensor = _tensor_builder->at(indices_index).get();
+ const auto backend_layout = ofm_tensor->layout();
UNUSED_RELEASE(backend_layout);
// NOTE The frontend layout and backend layout must be the same for this operation.
// a model. For example, if a model in NHWC has this operation as output rank == 4, indices
// rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
// and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
- assert(backend_layout == ifm_alloc->layout());
- assert(backend_layout == indices_alloc->layout());
+ assert(backend_layout == ifm_tensor->layout());
+ assert(backend_layout == indices_tensor->layout());
assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
auto fn = std::make_unique<::arm_compute::NEGatherEx>();
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
- assert(n == ifm_alloc->num_dimensions());
+ assert(n == ifm_tensor->num_dimensions());
size_t k = _ctx.at(indices_index).shape().rank();
- assert(k == indices_alloc->num_dimensions());
+ assert(k == indices_tensor->num_dimensions());
// Disable applied dim_correction
- if (n != ifm_alloc->info()->num_dimensions())
+ if (n != ifm_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and ifm tensor is applied dim_correction
const auto ifm = _ctx.at(ifm_index);
- ifm_alloc->info()->set_tensor_shape(
+ ifm_tensor->info()->set_tensor_shape(
acl_common::asTensorShape(ifm.shape(), _current_op_seq_layout, backend_layout, false));
}
- if (k != indices_alloc->info()->num_dimensions())
+ if (k != indices_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and indices tensor is applied dim_correction
const auto indices = _ctx.at(indices_index);
- indices_alloc->info()->set_tensor_shape(
+ indices_tensor->info()->set_tensor_shape(
acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
}
- fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis);
+ fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
// acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
// use arm_compute::TensorInfo::offset_element_in_bytes()
const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto gamma_alloc = _tensor_builder->at(gamma_index).get();
- auto beta_alloc = _tensor_builder->at(beta_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto gamma_tensor = _tensor_builder->at(gamma_index).get();
+ auto beta_tensor = _tensor_builder->at(beta_index).get();
auto epsilon = node.param().epsilon;
auto activation = node.param().activation;
auto fn = std::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), gamma_alloc->handle(),
- beta_alloc->handle(), epsilon);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
+ beta_tensor->handle(), epsilon);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::L2Normalization &node)
float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
float bias = 0.0f; // Don't offset the reduction.
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
radius, alpha, beta, bias, false);
auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
auto acl_fn = asAclFunction(std::move(fn));
void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::L2Pool2D::Input::INPUT)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+ node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
- uint32_t kw = node.param().kw;
- uint32_t kh = node.param().kh;
- const auto stride = node.param().stride;
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{
- ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh},
- ::onert::backend::acl_common::asPadStrideInfo(padding, stride)};
-
- auto fn = std::make_unique<::arm_compute::NEPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
auto beta = node.param().beta;
auto bias = node.param().bias;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(
::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
auto acl_fn = asAclFunction(std::move(fn));
const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input0_tensor = _tensor_builder->at(input0_index).get();
+ auto input1_tensor = _tensor_builder->at(input1_index).get();
auto fn = std::make_unique<::arm_compute::NELogicalAnd>();
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+ fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<::arm_compute::NEBitwiseNot>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input0_tensor = _tensor_builder->at(input0_index).get();
+ auto input1_tensor = _tensor_builder->at(input1_index).get();
auto fn = std::make_unique<::arm_compute::NELogicalOr>();
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+ fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
// instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
auto fn = std::make_unique<::arm_compute::NEActivationLayerEx>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
auto acl_fn = asAclFunction(std::move(fn));
void KernelGenerator::visit(const ir::operation::LSTM &node)
{
- // TODO Support dynamic rnn
- // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
- const auto scratch_buffer_index{
- node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
- const auto output_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
- const auto cell_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
- const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
-
- const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
- const auto input_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
- const auto input_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
- const auto input_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
- const auto input_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
- const auto recurrent_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
- const auto recurrent_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
- const auto recurrent_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
- const auto recurrent_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
- const auto cell_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
- const auto cell_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
- const auto cell_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
- const auto input_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
- const auto forget_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
- const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
- const auto output_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
- const auto projection_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
- const auto projection_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
- const auto output_state_in_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
- const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
- const auto cell_threshold = node.param().cell_threshold;
- const auto projection_threshold = node.param().projection_threshold;
-
- bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
- bool has_recurrent_to_input_weights =
- _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
- bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
- bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
- bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
- _ctx.at(projection_weights_index).shape().dim(1) != 0;
- bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
-
- // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
- // true: no CIFG
- // false: CIFG
- // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
- bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
-
- // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
- // But the cell_to_input_weights does not exist in regular CIFG although peephole.
- // true: peephole
- // false: no peephole
- bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
-
- // NOTE Although the projection weights has data the projection bias may not have data.
- bool has_projection_param = has_projection_weights;
-
- const auto activation = node.param().activation;
- const auto cell_clip = cell_threshold;
- const auto projection_clip = projection_threshold;
- assert(cell_clip >= 0.f && projection_clip >= 0.f);
-
- auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get();
- auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get();
- auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get();
- auto output_alloc = _tensor_builder->at(output_index).get();
-
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get();
- auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get();
- auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get();
- auto recurrent_to_forget_weights_alloc =
- _tensor_builder->at(recurrent_to_forget_weights_index).get();
- auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get();
- auto recurrent_to_output_weights_alloc =
- _tensor_builder->at(recurrent_to_output_weights_index).get();
-
- auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get();
- auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get();
- auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get();
- auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get();
- auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get();
-
- auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
-
- auto fn = std::make_unique<::arm_compute::NELSTMLayer>();
-
- ::arm_compute::LSTMParams<::arm_compute::ITensor> lstm_params{};
- if (has_cifg_param)
- {
- auto input_to_input_weights_alloc =
- _tensor_builder->at(input_to_input_weights_index).get(); // optional
- auto recurrent_to_input_weights_alloc =
- _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
- auto cell_to_input_weights_handle =
- has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle()
- : nullptr; // optional (non-cifg && peephole)
- auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional
- lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(),
- recurrent_to_input_weights_alloc->handle(),
- cell_to_input_weights_handle, input_gate_bias_alloc->handle());
- }
- if (has_peephole_param)
- {
- auto cell_to_forget_weights_alloc =
- _tensor_builder->at(cell_to_forget_weights_index).get(); // optional
- auto cell_to_output_weights_alloc =
- _tensor_builder->at(cell_to_output_weights_index).get(); // optional
- lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(),
- cell_to_output_weights_alloc->handle());
- }
- if (has_projection_param)
- {
- auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional
- auto projection_bias_handle = has_projection_bias
- ? _tensor_builder->at(projection_bias_index).get()->handle()
- : nullptr; // optional
- lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle);
- }
-
- fn->configure(
- input_alloc->handle(), input_to_forget_weights_alloc->handle(),
- input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(),
- recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(),
- recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(),
- cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(),
- cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(),
- output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(),
- lstm_params, act_info, cell_clip, projection_clip);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor,
+ ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_builder);
}
void KernelGenerator::visit(const ir::operation::Mul &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>();
// RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Neg &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::NENegLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
for (const auto &input_index : input_indexes)
{
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_alloc = _tensor_builder->at(input_index);
- assert(input_rank == input_alloc->num_dimensions());
- if (input_rank != input_alloc->info()->num_dimensions())
+ const auto &input_tensor = _tensor_builder->at(input_index);
+ assert(input_rank == input_tensor->num_dimensions());
+ if (input_rank != input_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- input_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+ input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
_ctx.at(input_index).shape(), _current_op_seq_layout, backend_layout, false));
}
}
const auto ofm_idx{node.getOutputs().at(0)};
const auto ifm_idx{node.getInputs().at(0)};
const auto permute_type = node.getPermuteType();
- auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
- auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
const auto rank = _ctx.at(ofm_idx).shape().rank();
assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
auto l = std::make_unique<::arm_compute::NEPermute>();
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
fn = std::move(l);
}
auto l = std::make_unique<::arm_compute::NEPermute>();
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
fn = std::move(l);
}
{
auto l = std::make_unique<::arm_compute::NECopy>();
- l->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle());
fn = std::move(l);
}
const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto alpha_alloc = _tensor_builder->at(alpha_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto alpha_tensor = _tensor_builder->at(alpha_index).get();
std::unique_ptr<::arm_compute::IFunction> fn;
- auto l = std::make_unique<::arm_compute::NEPReLU>();
+ auto l = std::make_unique<::arm_compute::NEPReluLayer>();
- l->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle());
+ l->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
fn = std::move(l);
const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
const auto input_rank = _ctx.at(input_index).shape().rank();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = input_alloc->layout();
+ const auto backend_layout = input_tensor->layout();
const auto reduce_axes =
acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
const auto reduce_type = node.param().reduce_type;
std::unique_ptr<::arm_compute::IFunction> fn;
if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
- // NOTE NEReduceMean has a bug that does not support NHWC layout
- // NEReduceMean intermediate tensors are always NCHW layout
- auto l = std::make_unique<::arm_compute::NEReduceMeanEx>();
+ auto l = std::make_unique<::arm_compute::NEReduceMean>();
- l->configure(input_alloc->handle(), reduce_axes, keep_dims, output_alloc->handle());
+ l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
fn = std::move(l);
}
{
auto l = std::make_unique<::arm_compute::NEReduceSum>();
- l->configure(input_alloc->handle(), reduce_axes, keep_dims, output_alloc->handle());
+ l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
fn = std::move(l);
}
{
auto l = std::make_unique<::arm_compute::NEReduceOperation>();
- l->configure(input_alloc->handle(), reduce_axes, keep_dims, output_alloc->handle(),
+ l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
acl_common::convertReduceType(reduce_type));
fn = std::move(l);
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<arm_compute::NEActivationLayer>();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
auto acl_fn = asAclFunction(std::move(fn));
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
auto acl_fn = asAclFunction(std::move(fn));
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
auto acl_fn = asAclFunction(std::move(fn));
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = output_alloc->layout();
+ const auto backend_layout = output_tensor->layout();
assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
frontend_layout == backend_layout);
UNUSED_RELEASE(frontend_layout);
auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::NEScale>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(),
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
const auto activation = node.param().activation;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
- auto weights_alloc = _tensor_builder->at(weights_index).get();
- auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
- auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
+ auto weights_tensor = _tensor_builder->at(weights_index).get();
+ auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
+ auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
auto copy_layer = std::make_unique<::arm_compute::NECopy>();
- copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle());
+ copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
_return_fn = asAclFunction(std::move(copy_layer));
- auto fn = std::make_unique<::arm_compute::NERNNLayerEx>(
+ auto fn = std::make_unique<::arm_compute::NERNNLayer>(
_tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_alloc->handle(), weights_alloc->handle(), recurrent_weights_alloc->handle(),
- bias_alloc->handle(), hidden_state_out_alloc->handle(), output_alloc->handle(),
- act_info);
+ fn->configure(input_tensor->handle(), weights_tensor->handle(),
+ recurrent_weights_tensor->handle(), bias_tensor->handle(),
+ hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
_return_fn = asAclFunction(std::move(fn));
}
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
auto fn = std::make_unique<::arm_compute::NERsqrtLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
_return_fn = asAclFunction(std::move(fn));
}
(void)dims;
(void)ndim;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
_return_fn = std::move(acl_fn);
}
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<arm_compute::NEActivationLayer>();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
auto acl_fn = asAclFunction(std::move(fn));
const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
const auto beta = node.param().beta;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
+ const auto frontend_layout = _current_op_seq_layout;
+ const auto backend_layout = input_tensor->layout();
+
+ // Disable applied dim_correction
+ const size_t input_rank = _ctx.at(input_index).shape().rank();
+ if (input_rank != input_tensor->info()->num_dimensions())
+ {
+ // This means that high dimension's value is 1 and input tensor is applied dim_correction
+ const auto input = _ctx.at(input_index);
+ input_tensor->info()->set_tensor_shape(
+ acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false));
+ }
auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>(
_tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_alloc->handle(), output_alloc->handle(), beta);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
auto acl_fn = asAclFunction(std::move(fn));
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto block_size_alloc = _tensor_builder->at(block_size_index).get();
- auto paddings_alloc = _tensor_builder->at(paddings_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+ auto paddings_tensor = _tensor_builder->at(paddings_index).get();
assert(_ctx.at(block_size_index).data());
assert(_ctx.at(paddings_index).data());
- // NESpaceToBatchLayer has a bug that padding's values are 0 even when zero point of QASYMM8 is
- // not 0.
- auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayerEx>();
+ auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayer>();
- fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(),
- ofm_alloc->handle());
+ fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+ ofm_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
auto block_size = node.param().block_size;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayerEx>();
+ auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayer>();
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size);
+ fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
auto acl_fn = asAclFunction(std::move(fn));
for (const auto &output : node.getOutputs())
output_indexes.emplace_back(output);
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- std::vector<arm_compute::ITensor *> output_allocs;
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ std::vector<arm_compute::ITensor *> output_tensors;
for (const auto &ofm_ind : output_indexes)
- output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+ output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = ifm_alloc->layout();
+ const auto backend_layout = ifm_tensor->layout();
auto axis = node.param().axis;
if (axis < 0)
axis += ifm_rank;
auto fn = std::make_unique<::arm_compute::NESplit>();
- fn->configure(ifm_alloc->handle(), output_allocs, axis);
+ fn->configure(ifm_tensor->handle(), output_tensors, axis);
_return_fn = asAclFunction(std::move(fn));
}
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
const ::arm_compute::ActivationLayerInfo act_info{
::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+ fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
auto acl_fn = asAclFunction(std::move(fn));
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::NEElementwiseSquaredDiff>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
arm_compute::ConvertPolicy::SATURATE);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto outputData_alloc = _tensor_builder->at(output_index).get();
- auto inputData_alloc = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_builder->at(output_index).get();
+ auto inputData_tensor = _tensor_builder->at(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = inputData_alloc->layout();
+ const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
int input_rank = _ctx.at(input_index).shape().rank();
auto fn = std::make_unique<::arm_compute::NESlice>();
- fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set);
+ fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
auto acl_fn = asAclFunction(std::move(fn));
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto outputData_alloc = _tensor_builder->at(output_index).get();
- auto inputData_alloc = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_builder->at(output_index).get();
+ auto inputData_tensor = _tensor_builder->at(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = inputData_alloc->layout();
+ const auto backend_layout = inputData_tensor->layout();
// Set initializers for indices data such as order of inputData
int input_rank = _ctx.at(input_index).shape().rank();
auto fn = std::make_unique<::arm_compute::NEStridedSlice>();
- fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set,
+ fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
strides_set, begin_mask, end_mask, shrink_axis_mask);
auto acl_fn = asAclFunction(std::move(fn));
invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
}
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ker_tensor = _tensor_builder->at(ker_index).get();
const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
auto fn = std::make_unique<::arm_compute::NETransposeConvLayer>();
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info,
- invalid_horizontal, invalid_vertical);
+ fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
+ tconv_info, invalid_horizontal, invalid_vertical);
auto acl_fn = asAclFunction(std::move(fn));
const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
const auto &perm{node.param().perm};
- auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
- const auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
+ const auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = ifm_alloc->layout();
+ const auto backend_layout = ifm_tensor->layout();
const auto rank = _ctx.at(ifm_idx).shape().rank();
std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
std::unique_ptr<::arm_compute::IFunction> fn;
- if (ifm_alloc->num_dimensions() <= 2 && ofm_alloc->num_dimensions() <= 2)
+ if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2)
{
auto l = std::make_unique<::arm_compute::NETranspose>();
- l->configure(ifm_alloc->handle(), ofm_alloc->handle());
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle());
fn = std::move(l);
}
{
auto l = std::make_unique<::arm_compute::NEPermute>();
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv);
+ l->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
fn = std::move(l);
}
for (const auto &output_index : output_indexes)
{
size_t output_rank = _ctx.at(output_index).shape().rank();
- const auto &output_alloc = _tensor_builder->at(output_index);
- orig_outputs_acl_tensor_shapes.emplace_back(output_alloc->info()->tensor_shape());
- assert(output_rank == output_alloc->num_dimensions());
- if (output_rank != output_alloc->info()->num_dimensions())
+ const auto &output_tensor = _tensor_builder->at(output_index);
+ orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
+ assert(output_rank == output_tensor->num_dimensions());
+ if (output_rank != output_tensor->info()->num_dimensions())
{
// This means that high dimension's value is 1 and ifm tensor is applied dim_correction
- output_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+ output_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
_ctx.at(output_index).shape(), _current_op_seq_layout, backend_layout, false));
}
}
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::NEArithmeticAddition>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
arm_compute::ConvertPolicy::SATURATE);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Div &node)
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::NEElementwiseDivision>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Exp &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<::arm_compute::NEExpLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input_tensor = _tensor_builder->at(input_index).get();
auto fn = std::make_unique<::arm_compute::NEReshapeLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
+ fn->configure(input_tensor->handle(), output_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
const auto comparison_type = node.param().comparison_type;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
+ auto output_tensor = _tensor_builder->at(output_index).get();
+ auto input0_tensor = _tensor_builder->at(input0_index).get();
+ auto input1_tensor = _tensor_builder->at(input1_index).get();
auto fn = std::make_unique<::arm_compute::NEElementwiseComparison>();
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
+ fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
(arm_compute::ComparisonOperation)comparison_type);
auto acl_fn = asAclFunction(std::move(fn));
const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::NEElementwiseMin>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->at(rhs_index).get();
auto fn = std::make_unique<::arm_compute::NEElementwiseMax>();
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+ fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
auto acl_fn = asAclFunction(std::move(fn));
#ifndef __ONERT_BACKEND_CPU_BACKEND_H__
#define __ONERT_BACKEND_CPU_BACKEND_H__
+#include "BackendContext.h"
#include "Config.h"
#include "ConstantInitializer.h"
#include "KernelGenerator.h"
std::shared_ptr<IConfig> config() const override { return _config; }
- std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
- const std::shared_ptr<custom::IKernelBuilder> &kb,
- bool) const override
+ std::unique_ptr<onert::backend::BackendContext>
+ newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
+ bool) const override
{
const auto &operands = graph.operands();
const auto &operations = graph.operations();
auto tb = std::make_shared<TensorBuilder>();
context->tensor_builder = tb;
context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb,
+ context->external_context());
context->tensor_register = nullptr;
context->optimizer = nullptr;
return context;
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+ BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
+ std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
+ std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
+ std::shared_ptr<ITensorRegister> tensor_register = nullptr,
+ std::shared_ptr<IOptimizer> optimizer = nullptr)
+ : onert::backend::BackendContext(backend, graph, tensor_builder, constant_initializer,
+ kernel_gen, tensor_register, optimizer),
+ _external_context(new ExternalContext)
+ {
+ }
+
+ std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+ // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+ // the thread pool is also created in duplicate
+ // TODO Create one ruy context for session
+ std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
set(LIB_ONERT_BACKEND_CPU onert_backend_cpu)
+nnfw_find_package(Ruy REQUIRED)
+
file(GLOB_RECURSE SOURCES "*.cc")
add_library(${LIB_ONERT_BACKEND_CPU} SHARED ${SOURCES})
target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE onert_core)
target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_common)
target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_coverage)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE ruy)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} INTERFACE ruy_instrumentation)
set_target_properties(${LIB_ONERT_BACKEND_CPU} PROPERTIES OUTPUT_NAME backend_cpu)
*/
#include "ConstantInitializer.h"
+#include "Tensor.h"
namespace onert
{
// DO NOTHING
}
+void ConstantInitializer::registerDefaultInitializer(const ir::OperandIndex &index,
+ const ir::Operand &obj)
+{
+ registerExternalInitializer(index, obj);
+}
+
+void ConstantInitializer::registerExternalInitializer(const ir::OperandIndex &index,
+ const ir::Operand &obj)
+{
+ // For only CONSTANTS
+ // TODO Add to check if tensor has been allocated
+ if (!obj.isConstant())
+ return;
+
+ _init_map[index] = [](const onert::ir::Operand &model_obj, onert::backend::ITensor &itensor) {
+ auto data = model_obj.shareData();
+ assert(data && data->base());
+ ExternalTensor &tensor = dynamic_cast<ExternalTensor &>(itensor);
+ tensor.setData(data);
+ };
+}
+
void ConstantInitializer::visit(const ir::operation::Conv2D &node)
{
const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
const auto &kernel_obj = _operands.at(kernel_index);
- registerCopyInitializer(kernel_index, kernel_obj);
+ registerExternalInitializer(kernel_index, kernel_obj);
const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
+ registerExternalInitializer(bias_index, bias_obj);
}
void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
{
const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
const auto &kernel_obj = _operands.at(kernel_index);
- registerCopyInitializer(kernel_index, kernel_obj);
+ registerExternalInitializer(kernel_index, kernel_obj);
const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
+ registerExternalInitializer(bias_index, bias_obj);
}
void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
{
const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
const auto &weight_obj = _operands.at(weight_index);
- registerCopyInitializer(weight_index, weight_obj);
+ registerExternalInitializer(weight_index, weight_obj);
const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
if (!bias_index.undefined())
{
const auto &bias_obj = _operands.at(bias_index);
- registerCopyInitializer(bias_index, bias_obj);
+ registerExternalInitializer(bias_index, bias_obj);
}
}
const std::shared_ptr<TensorBuilder> &tensor_builder);
public:
+ void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
+
+ // TODO: For now the only cpu backend supports constant tensor to use data from external
+ // If the other backend supports (to do this,
+ // ExternalTensor should be abstract such as IExternal, maybe),
+ // this can be an interface of IConstantInitializer
+ void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &);
+
+public:
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
void visit(const ir::operation::FullyConnected &) override;
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
+
+#include <backend/IExternalContext.h>
+#include <util/ConfigSource.h>
+#include <ruy/context.h>
+
+namespace
+{
+const int kDefaultNumThreadpoolThreads = 1;
+}
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class ExternalContext : public IExternalContext
+{
+public:
+ ExternalContext() : _ruy_context(new ruy::Context)
+ {
+ setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
+#ifdef USE_RUY_GEMV
+ _ruy_context->cache_policy = ruy::kCacheLHSOnNarrowMul;
+#endif
+ }
+
+ void setMaxNumThreads(int max_num_threads)
+ {
+ const int target_num_threads =
+ max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+ _ruy_context->max_num_threads = target_num_threads;
+ }
+
+ ruy::Context *ruy_context() const { return _ruy_context.get(); }
+
+private:
+ const std::unique_ptr<ruy::Context> _ruy_context;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
#include "ops/AddLayer.h"
#include "ops/ArgMinMaxLayer.h"
#include "ops/AvgPoolLayer.h"
+#include "ops/BatchToSpaceNDLayer.h"
#include "ops/CastLayer.h"
#include "ops/CompareLayer.h"
#include "ops/ConcatLayer.h"
#include "ops/RangeLayer.h"
#include "ops/ReduceLayer.h"
#include "ops/ReLULayer.h"
+#include "ops/ReLU6Layer.h"
#include "ops/ReshapeLayer.h"
+#include "ops/ResizeBilinearLayer.h"
#include "ops/ReverseLayer.h"
#include "ops/RoundLayer.h"
#include "ops/RsqrtLayer.h"
#include "ops/SoftMaxLayer.h"
#include "ops/StridedSliceLayer.h"
#include "ops/SpaceToBatchNDLayer.h"
+#include "ops/SpaceToDepthLayer.h"
#include "ops/SplitLayer.h"
+#include "ops/SplitVLayer.h"
#include "ops/SubLayer.h"
#include "ops/TanhLayer.h"
#include "ops/TileLayer.h"
#include "ops/ZerosLikeLayer.h"
#include "ops/SquaredDiffLayer.h"
#include "ops/LogicalOrLayer.h"
+#include "ops/L2NormLayer.h"
#include "ops/MatrixBandPartLayer.h"
#include "ops/BatchMatMulLayer.h"
#include "ops/BroadcastToLayer.h"
#include "ops/FusedBatchNormLayer.h"
#include "ops/LogSoftMaxLayer.h"
+#include "ops/QuantizeLayer.h"
+#include "ops/StatelessRandomUniformLayer.h"
#include <backend/Backend.h>
#include <backend/IConfig.h>
KernelGenerator::KernelGenerator(
const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder)
+ const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context)
: _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
- _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN)
+ _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN),
+ _external_context(external_context)
{
// DO NOTHING
}
const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
- auto ker_alloc = _tensor_builder->portableAt(ker_index).get();
- auto bias_alloc = _tensor_builder->portableAt(bias_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
+ auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
+ auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
const auto stride = node.param().stride;
const auto activation = node.param().activation;
if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
{
- fn->configure(ifm_alloc, ker_alloc, bias_alloc, param_padding.type, param_padding.param.left,
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
- stride.horizontal, stride.vertical, activation, ofm_alloc);
+ stride.horizontal, stride.vertical, activation, ofm_tensor);
_return_fn = std::move(fn);
return;
const auto padding =
ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
- fn->configure(ifm_alloc, ker_alloc, bias_alloc, param_padding.type, padding.left, padding.right,
- padding.top, padding.bottom, stride.horizontal, stride.vertical, activation,
- ofm_alloc);
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+ padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+ activation, ofm_tensor);
_return_fn = std::move(fn);
}
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
- auto ker_alloc = _tensor_builder->portableAt(ker_index).get();
- auto bias_alloc = _tensor_builder->portableAt(bias_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
+ auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
+ auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
- fn->configure(ifm_alloc, ker_alloc, bias_alloc, padding.left, padding.right, padding.top,
+ fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
padding.bottom, stride.horizontal, stride.vertical, multiplier, activation,
- ofm_alloc);
+ ofm_tensor);
_return_fn = std::move(fn);
}
ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::MaxPoolLayer>();
- fn->configure(ifm_alloc, padding.left, padding.right, padding.top, padding.bottom,
- stride.horizontal, stride.vertical, kw, kh, activation, ofm_alloc);
+ fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
+ stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
_return_fn = std::move(fn);
}
ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::AvgPoolLayer>();
- fn->configure(ifm_alloc, padding.left, padding.right, padding.top, padding.bottom,
- stride.horizontal, stride.vertical, kw, kh, activation, ofm_alloc);
+ fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
+ stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
_return_fn = std::move(fn);
}
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
- auto output_alloc = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
auto fn = std::make_unique<ops::ConcatLayer>();
- fn->configure(input_tensors, axis, output_alloc);
+ fn->configure(input_tensors, axis, output_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
+ const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
+
+ auto output_alloc = _tensor_builder->portableAt(output_index).get();
+ auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto block_size_alloc = _tensor_builder->portableAt(block_size_index).get();
+
+ auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
+
+ IPortableTensor *crops_alloc = nullptr;
+ const auto NNApiInputs = 2;
+
+ if (node.getInputs().size() != NNApiInputs)
+ {
+ const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
+ crops_alloc = _tensor_builder->portableAt(crops_data_index).get();
+ }
+
+ fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
_return_fn = std::move(fn);
}
const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto value_alloc = _tensor_builder->portableAt(value_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto value_tensor = _tensor_builder->portableAt(value_index).get();
auto fn = std::make_unique<ops::FillLayer>();
- fn->configure(input_alloc, value_alloc, output_alloc);
+ fn->configure(input_tensor, value_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
const auto activation = node.param().activation;
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto weight_alloc = _tensor_builder->portableAt(weight_index).get();
- auto bias_alloc =
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto weight_tensor = _tensor_builder->portableAt(weight_index).get();
+ auto bias_tensor =
bias_index.undefined() ? nullptr : _tensor_builder->portableAt(bias_index).get();
auto fn = std::make_unique<ops::FullyConnectedLayer>();
- fn->configure(input_alloc, weight_alloc, bias_alloc, activation, output_alloc);
+ fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor,
+ _external_context);
_return_fn = std::move(fn);
}
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
// optional 2nd input
- IPortableTensor *shape_alloc = nullptr;
+ IPortableTensor *shape_tensor = nullptr;
if (node.getInputs().size() == 2)
{
const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
- shape_alloc = _tensor_builder->portableAt(shape_index).get();
+ shape_tensor = _tensor_builder->portableAt(shape_index).get();
}
auto fn = std::make_unique<ops::ReshapeLayer>();
- fn->configure(input_alloc, shape_alloc, output_alloc);
+ fn->configure(input_tensor, shape_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
// Squeeze can share same kernel with reshape
auto fn = std::make_unique<ops::ReshapeLayer>();
- fn->configure(input_alloc, nullptr, output_alloc);
+ fn->configure(input_tensor, nullptr, output_tensor);
_return_fn = std::move(fn);
}
const auto beta = node.param().beta;
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::SoftMaxLayer>();
- fn->configure(input_alloc, beta, output_alloc);
+ fn->configure(input_tensor, beta, output_tensor);
_return_fn = std::move(fn);
}
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::AddLayer>();
- fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
_return_fn = std::move(fn);
}
const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto comparison_type = node.param().comparison_type;
auto fn = std::make_unique<ops::CompareLayer>();
- fn->configure(lhs_alloc, rhs_alloc, comparison_type, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
_return_fn = std::move(fn);
}
const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto indices_alloc = _tensor_builder->portableAt(indices_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
- const auto backend_layout = output_alloc->layout();
+ const auto backend_layout = output_tensor->layout();
UNUSED_RELEASE(backend_layout);
// NOTE The frontend layout and backend layout must be the same for this operation.
// a model. For example, if a model in NHWC has this operation as output rank == 4, indices
// rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
// and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
- assert(backend_layout == input_alloc->layout());
- assert(backend_layout == indices_alloc->layout());
+ assert(backend_layout == input_tensor->layout());
+ assert(backend_layout == indices_tensor->layout());
const auto &input_shape = _ctx.at(input_index).shape();
UNUSED_RELEASE(input_shape);
assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
auto fn = std::make_unique<ops::GatherLayer>();
- fn->configure(input_alloc, indices_alloc, output_alloc, axis_value);
+ fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
_return_fn = std::move(fn);
}
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::SubLayer>();
- fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
_return_fn = std::move(fn);
}
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::MulLayer>();
- fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
_return_fn = std::move(fn);
}
const auto axis = node.param().axis;
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto indices_alloc = _tensor_builder->portableAt(indices_index).get();
- auto depth_alloc = _tensor_builder->portableAt(depth_index).get();
- auto onvalue_alloc = _tensor_builder->portableAt(onvalue_index).get();
- auto offvalue_alloc = _tensor_builder->portableAt(offvalue_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
+ auto depth_tensor = _tensor_builder->portableAt(depth_index).get();
+ auto onvalue_tensor = _tensor_builder->portableAt(onvalue_index).get();
+ auto offvalue_tensor = _tensor_builder->portableAt(offvalue_index).get();
- assert(indices_alloc->data_type() == OperandType::INT32);
- assert(axis <= static_cast<int>(indices_alloc->num_dimensions()));
+ assert(indices_tensor->data_type() == OperandType::INT32);
+ assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
auto fn = std::make_unique<ops::OneHotLayer>();
- fn->configure(indices_alloc, depth_alloc, onvalue_alloc, offvalue_alloc, output_alloc, axis);
+ fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
_return_fn = std::move(fn);
}
const auto activation = node.param().activation;
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::DivLayer>();
- fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
_return_fn = std::move(fn);
}
{
const auto ofm_index{node.getOutputs().at(0)};
- auto output_alloc = _tensor_builder->portableAt(ofm_index).get();
- std::vector<const IPortableTensor *> input_allocs;
+ auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_allocs.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
const auto equation = node.param().equation;
auto fn = std::make_unique<ops::EinsumLayer>();
- fn->configure(input_allocs, equation, output_alloc);
+ fn->configure(input_tensors, equation, output_tensor);
_return_fn = std::move(fn);
}
{
auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
std::vector<custom::TypeInfo> &types,
- std::vector<std::shared_ptr<IPortableTensor>> &allocs) {
+ std::vector<std::shared_ptr<IPortableTensor>> &tensors) {
for (auto &idx : opSeq)
{
const auto &operand = _ctx.at(idx);
// TODO make sure using `_current_op_seq_layout` is correct for custom operations
types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
- auto in_alloc = _tensor_builder->portableAt(idx);
- allocs.emplace_back(in_alloc);
+ auto in_tensor = _tensor_builder->portableAt(idx);
+ tensors.emplace_back(in_tensor);
}
};
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::ExpLayer>();
- fn->configure(input_alloc, output_alloc);
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto axis_alloc = _tensor_builder->portableAt(axis_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
auto fn = std::make_unique<ops::ExpandDimsLayer>();
- fn->configure(input_alloc, axis_alloc, output_alloc);
+ fn->configure(input_tensor, axis_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::LogisticLayer>();
- fn->configure(input_alloc, output_alloc);
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::TanhLayer>();
- fn->configure(input_alloc, output_alloc);
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
assert(-rank <= axis && axis < rank);
- auto output_alloc = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
auto fn = std::make_unique<ops::PackLayer>();
- fn->configure(input_tensors, axis, output_alloc);
+ fn->configure(input_tensors, axis, output_tensor);
_return_fn = std::move(fn);
}
assert(rank == 0 || (-rank <= axis && axis < rank));
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
std::vector<IPortableTensor *> output_tensors;
for (auto &output_idx : node.getOutputs())
uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
- fn->configure(input_alloc, axis_resolved, node.param().num, output_tensors);
+ fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
_return_fn = std::move(fn);
}
auto fn = std::make_unique<ops::PadLayer>();
- fn->configure(input, output, pad_base, pad_rank);
+ bool isPadV2 = node.getInputs().size() == 3 ? true : false;
+ const void *value = nullptr;
+
+ if (isPadV2)
+ {
+ const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
+ value = reinterpret_cast<const void *>(_ctx.at(value_index).data()->base());
+ }
+ fn->configure(input, output, pad_base, pad_rank, value);
_return_fn = std::move(fn);
}
const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::MaxLayer>();
- fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::MinLayer>();
- fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::CastLayer>();
- fn->configure(ifm_alloc, ofm_alloc);
+ fn->configure(ifm_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::TransposeLayer>();
- fn->configure(input_alloc, output_alloc, node.param().perm);
+ fn->configure(input_tensor, output_tensor, node.param().perm);
_return_fn = std::move(fn);
}
const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
const auto keep_dims = node.param().keep_dims;
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto axes_alloc = _tensor_builder->portableAt(axes_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto axes_tensor = _tensor_builder->portableAt(axes_index).get();
if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
auto fn = std::make_unique<ops::MeanLayer>();
- fn->configure(input_alloc, axes_alloc, output_alloc, keep_dims);
+ fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
_return_fn = std::move(fn);
}
auto fn = std::make_unique<ops::ReduceLayer>();
const auto reduce_type = convertReduceType(node.param().reduce_type);
- fn->configure(input_alloc, axes_alloc, output_alloc, reduce_type, keep_dims);
+ fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
_return_fn = std::move(fn);
}
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(0)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::ReLULayer>();
- fn->configure(input_alloc, output_alloc);
+ fn->configure(input_tensor, output_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReLU6 &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+
+ auto fn = std::make_unique<ops::ReLU6Layer>();
+
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto condition_alloc = _tensor_builder->portableAt(condition_index).get();
- auto true_alloc = _tensor_builder->portableAt(true_index).get();
- auto false_alloc = _tensor_builder->portableAt(false_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto condition_tensor = _tensor_builder->portableAt(condition_index).get();
+ auto true_tensor = _tensor_builder->portableAt(true_index).get();
+ auto false_tensor = _tensor_builder->portableAt(false_index).get();
auto fn = std::make_unique<ops::SelectLayer>();
- fn->configure(condition_alloc, true_alloc, false_alloc, output_alloc);
+ fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto begins_alloc = _tensor_builder->portableAt(begins_index).get();
- auto sizes_alloc = _tensor_builder->portableAt(sizes_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto begins_tensor = _tensor_builder->portableAt(begins_index).get();
+ auto sizes_tensor = _tensor_builder->portableAt(sizes_index).get();
auto fn = std::make_unique<ops::SliceLayer>();
- fn->configure(input_alloc, begins_alloc, sizes_alloc, output_alloc);
+ fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto starts_alloc = _tensor_builder->portableAt(starts_index).get();
- auto ends_alloc = _tensor_builder->portableAt(ends_index).get();
- auto strides_alloc = _tensor_builder->portableAt(strides_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto starts_tensor = _tensor_builder->portableAt(starts_index).get();
+ auto ends_tensor = _tensor_builder->portableAt(ends_index).get();
+ auto strides_tensor = _tensor_builder->portableAt(strides_index).get();
auto begin_mask = node.param().begin_mask;
auto end_mask = node.param().end_mask;
auto fn = std::make_unique<ops::StridedSliceLayer>();
- fn->configure(input_alloc, starts_alloc, ends_alloc, strides_alloc, output_alloc, begin_mask,
+ fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
end_mask, shrink_axis_mask);
_return_fn = std::move(fn);
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::AbsLayer>();
- fn->configure(ifm_alloc, ofm_alloc);
+ fn->configure(ifm_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Sin::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::SinLayer>();
- fn->configure(ifm_alloc, ofm_alloc);
+ fn->configure(ifm_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Cos::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::CosLayer>();
- fn->configure(ifm_alloc, ofm_alloc);
+ fn->configure(ifm_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::RsqrtLayer>();
- fn->configure(ifm_alloc, ofm_alloc);
+ fn->configure(ifm_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::ShapeLayer>();
- fn->configure(ifm_alloc, ofm_alloc);
+ fn->configure(ifm_tensor, ofm_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
+
+ auto output_height = node.param().height_out;
+ auto output_width = node.param().width_out;
+ auto align_corners = node.param().align_corners;
+ auto half_pixel_centers = node.param().half_pixel_centers;
+
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+
+ auto fn = std::make_unique<ops::ResizeBilinearLayer>();
+
+ fn->configure(input_tensor, output_tensor, output_height, output_width, align_corners,
+ half_pixel_centers);
_return_fn = std::move(fn);
}
const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto axis_alloc = _tensor_builder->portableAt(axis_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
auto fn = std::make_unique<ops::ReverseLayer>();
- fn->configure(input_alloc, axis_alloc, output_alloc);
+ fn->configure(input_tensor, axis_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::NegLayer>();
- fn->configure(ifm_alloc, ofm_alloc);
+ fn->configure(ifm_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
const auto axis = node.param().axis;
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::ArgMinMaxLayer>();
- fn->configure(input_alloc, output_alloc, axis, /* is_arg_max */ true);
+ fn->configure(input_tensor, output_tensor, axis, /* is_arg_max */ true);
_return_fn = std::move(fn);
}
const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::PowLayer>();
- fn->configure(lhs_alloc, rhs_alloc, ir::Activation::NONE, output_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
_return_fn = std::move(fn);
}
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Log::Input::INPUT)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
auto fn = std::make_unique<ops::LogLayer>();
- fn->configure(ifm_alloc, ofm_alloc);
+ fn->configure(ifm_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Round::INPUT)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::RoundLayer>();
- fn->configure(input_alloc, output_alloc);
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::LogicalNot::INPUT)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::LogicalNotLayer>();
- fn->configure(input_alloc, output_alloc);
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto lhs_index{node.getInputs().at(0)};
const auto rhs_index{node.getInputs().at(1)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::LogicalOrLayer>();
- fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::ZerosLike &node)
+void KernelGenerator::visit(const ir::operation::L2Normalization &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)};
+ const auto input_index{node.getInputs().at(0)};
auto output_alloc = _tensor_builder->portableAt(output_index).get();
auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto fn = std::make_unique<ops::ZerosLikeLayer>();
+ auto fn = std::make_unique<ops::L2NormLayer>();
fn->configure(input_alloc, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ZerosLike &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)};
+
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+
+ auto fn = std::make_unique<ops::ZerosLikeLayer>();
+
+ fn->configure(input_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto start_alloc = _tensor_builder->portableAt(start_index).get();
- auto limit_alloc = _tensor_builder->portableAt(limit_index).get();
- auto delta_alloc = _tensor_builder->portableAt(delta_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto start_tensor = _tensor_builder->portableAt(start_index).get();
+ auto limit_tensor = _tensor_builder->portableAt(limit_index).get();
+ auto delta_tensor = _tensor_builder->portableAt(delta_index).get();
auto fn = std::make_unique<ops::RangeLayer>();
- fn->configure(start_alloc, limit_alloc, delta_alloc, output_alloc);
+ fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
auto fn = std::make_unique<ops::SqDiffLayer>();
- fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
_return_fn = std::move(fn);
}
const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto multiples_alloc = _tensor_builder->portableAt(multiples_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto multiples_tensor = _tensor_builder->portableAt(multiples_index).get();
auto fn = std::make_unique<ops::TileLayer>();
- fn->configure(input_alloc, multiples_alloc, output_alloc);
+ fn->configure(input_tensor, multiples_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto num_lower_alloc = _tensor_builder->portableAt(num_lower_index).get();
- auto num_upper_alloc = _tensor_builder->portableAt(num_upper_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto num_lower_tensor = _tensor_builder->portableAt(num_lower_index).get();
+ auto num_upper_tensor = _tensor_builder->portableAt(num_upper_index).get();
auto fn = std::make_unique<ops::MatrixBandPartLayer>();
- fn->configure(input_alloc, num_lower_alloc, num_upper_alloc, output_alloc);
+ fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
_return_fn = std::move(fn);
}
const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+ auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
const auto adj_x = node.param().adj_x;
const auto adj_y = node.param().adj_y;
auto fn = std::make_unique<ops::BatchMatMulLayer>();
- fn->configure(lhs_alloc, rhs_alloc, adj_x, adj_y, output_alloc);
+ fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
_return_fn = std::move(fn);
}
const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto shape_alloc = _tensor_builder->portableAt(shape_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto shape_tensor = _tensor_builder->portableAt(shape_index).get();
auto fn = std::make_unique<ops::BroadcastToLayer>();
- fn->configure(input_alloc, shape_alloc, output_alloc);
+ fn->configure(input_tensor, shape_tensor, output_tensor);
_return_fn = std::move(fn);
}
{
const auto ofm_index{node.getOutputs().at(0)};
- auto output_alloc = _tensor_builder->portableAt(ofm_index).get();
- std::vector<const IPortableTensor *> input_allocs;
+ auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_allocs.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
const auto epsilon = node.param().epsilon;
const auto is_training = node.param().is_training;
auto fn = std::make_unique<ops::FusedBatchNormLayer>();
- fn->configure(input_allocs, epsilon, is_training, data_format, output_alloc);
+ fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
_return_fn = std::move(fn);
}
const auto beta = node.param().beta;
const auto axis = node.param().axis;
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
auto fn = std::make_unique<ops::LogSoftMaxLayer>();
- fn->configure(input_alloc, beta, axis, output_alloc);
+ fn->configure(input_tensor, beta, axis, output_tensor);
_return_fn = std::move(fn);
}
const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto block_shape_alloc = _tensor_builder->portableAt(block_shape_index).get();
- auto padding_alloc = _tensor_builder->portableAt(padding_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto block_shape_tensor = _tensor_builder->portableAt(block_shape_index).get();
+ auto padding_tensor = _tensor_builder->portableAt(padding_index).get();
auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
- fn->configure(input_alloc, block_shape_alloc, padding_alloc, output_alloc);
+ fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Quantize &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Quantize::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+
+ auto fn = std::make_unique<ops::QuantizeLayer>();
+
+ fn->configure(input_tensor, output_tensor);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+ auto block_size = node.param().block_size;
+
+ auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_builder->portableAt(output_index).get();
+
+ auto fn = std::make_unique<ops::SpaceToDepthLayer>();
+
+ fn->configure(input_tensor, block_size, output_tensor);
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
+ const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
+
+ auto output_alloc = _tensor_builder->portableAt(output_index).get();
+ auto shape_alloc = _tensor_builder->portableAt(shape_index).get();
+ auto seed_alloc = _tensor_builder->portableAt(seed_index).get();
+
+ auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
+
+ fn->configure(shape_alloc, seed_alloc, output_alloc);
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::SplitV &node)
+{
+ const auto num_splits = node.param().num_splits;
+ assert(num_splits == static_cast<int>(node.getOutputs().size()));
+
+ const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
+ const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
+ const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
+
+ auto in_tensor = _tensor_builder->portableAt(input_idx).get();
+ auto in_size_splits = _tensor_builder->portableAt(size_splits).get();
+ auto in_split_dim = _tensor_builder->portableAt(split_dim).get();
+
+ std::vector<IPortableTensor *> out_tensors;
+ for (auto &output_idx : node.getOutputs())
+ out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+
+ auto fn = std::make_unique<ops::SplitVLayer>();
+
+ fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
_return_fn = std::move(fn);
}
#ifndef __ONERT_BACKEND_CPU_KERNEL_GENERATOR_H__
#define __ONERT_BACKEND_CPU_KERNEL_GENERATOR_H__
+#include "ExternalContext.h"
#include "TensorBuilder.h"
#include "Tensor.h"
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
const std::shared_ptr<TensorBuilder> &tensor_builder,
- const std::shared_ptr<custom::IKernelBuilder> &kernel_builder);
+ const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
+ const std::shared_ptr<ExternalContext> &external_context);
using IKernelGenerator::visit;
void visit(const ir::operation::Transpose &) override;
void visit(const ir::operation::Reduce &) override;
void visit(const ir::operation::ReLU &) override;
+ void visit(const ir::operation::ReLU6 &) override;
void visit(const ir::operation::Select &) override;
void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::StridedSlice &) override;
void visit(const ir::operation::Sin &) override;
void visit(const ir::operation::RSQRT &) override;
void visit(const ir::operation::Shape &) override;
+ void visit(const ir::operation::ResizeBilinear &node) override;
void visit(const ir::operation::Reverse &) override;
void visit(const ir::operation::Neg &) override;
void visit(const ir::operation::ArgMax &) override;
void visit(const ir::operation::SquaredDifference &) override;
void visit(const ir::operation::Tile &) override;
void visit(const ir::operation::LogicalOr &) override;
+ void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::Range &) override;
void visit(const ir::operation::MatrixBandPart &) override;
void visit(const ir::operation::BatchMatMul &) override;
+ void visit(const ir::operation::BatchToSpaceND &) override;
void visit(const ir::operation::BroadcastTo &) override;
void visit(const ir::operation::FusedBatchNorm &) override;
void visit(const ir::operation::LogSoftmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
+ void visit(const ir::operation::Quantize &) override;
+ void visit(const ir::operation::SpaceToDepth &) override;
+ void visit(const ir::operation::StatelessRandomUniform &) override;
+ void visit(const ir::operation::SplitV &) override;
private:
const ir::Operands &_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
ir::Layout _current_op_seq_layout;
+ const std::shared_ptr<ExternalContext> _external_context;
};
} // namespace cpu
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
+#include <util/logging.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+StaticTensorManager::StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> ®,
+ cpu_common::DynamicTensorManager *dynamic_tensor_manager)
+ : _nonconst_mgr{new cpu_common::MemoryManager()}, _tensors{reg},
+ _dynamic_tensor_manager{dynamic_tensor_manager}
+{
+ // DO NOTHING
+}
+
+void StaticTensorManager::allocateNonconsts(void)
+{
+ _nonconst_mgr->allocate();
+
+ for (auto &pair : _tensors->native_tensors())
+ {
+ const auto &ind = pair.first;
+ auto tensor = pair.second;
+ if (!_as_constants[ind] && !tensor->is_dynamic())
+ {
+ auto *buffer = _nonconst_mgr->getBuffer(ind);
+ tensor->setBuffer(buffer);
+
+ VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
+ << "): " << static_cast<void *>(buffer) << std::endl;
+ }
+ }
+}
+
+void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
+
+void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
+ const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
+ bool as_const)
+{
+ assert(!_tensors->getITensor(ind));
+ if (as_const)
+ {
+ auto tensor = std::make_shared<ExternalTensor>(tensor_info, backend_layout);
+ _tensors->setNativeTensor(ind, tensor);
+ }
+ else
+ {
+ auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, _dynamic_tensor_manager);
+ _tensors->setNativeTensor(ind, tensor);
+ }
+ _as_constants[ind] = as_const;
+}
+
+void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
+{
+ assert(_tensors->getITensor(ind));
+
+ // This method is called only when a tensor has proper shape
+ assert(!_tensors->getITensor(ind)->is_dynamic());
+
+ if (!_as_constants[ind])
+ _nonconst_mgr->claimPlan(ind, size);
+}
+
+void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
+{
+ assert(_tensors->getITensor(ind));
+
+ // This method is called only when a tensor has proper shape
+ assert(!_tensors->getITensor(ind)->is_dynamic());
+
+ if (!_as_constants[ind])
+ _nonconst_mgr->releasePlan(ind);
+}
+
+void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
+{
+ for (const auto &it : _tensors->native_tensors())
+ fn(it.first);
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
+
+#include "backend/IStaticTensorManager.h"
+#include "backend/cpu_common/DynamicTensorManager.h"
+#include "backend/cpu_common/MemoryManager.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/ITensorManager.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandInfo.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class StaticTensorManager : public backend::IStaticTensorManager
+{
+public:
+ StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> ®,
+ cpu_common::DynamicTensorManager *dynamic_tensor_manager);
+ virtual ~StaticTensorManager() = default;
+
+ void allocateNonconsts(void);
+ void deallocateNonconsts(void);
+
+ void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
+ ir::Layout backend_layout, bool as_const);
+
+ void claimPlan(const ir::OperandIndex &ind, uint32_t size);
+ void releasePlan(const ir::OperandIndex &ind);
+
+ void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
+
+private:
+ std::unique_ptr<cpu_common::MemoryManager> _nonconst_mgr;
+ const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
+ ir::OperandIndexMap<bool> _as_constants;
+ cpu_common::DynamicTensorManager *_dynamic_tensor_manager;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
using Tensor = cpu_common::Tensor;
-// Tensor which has data from external. To support this, assume below things
-// no padding, always NHWC layout, constant tensor and not dynamic
+/**
+ * @brief Class that uses data from external memory that is not managed by a backend
+ * instead of allocating and copying the data. ExternalTensor's data pointer points to
+ * an address of memory such as where memory is already allocated, or mmapped area.
+ * This is meaning that ExternalTensor can take all of types' ir::Data.
+ * To support this, assume below things no padding, always NHWC layout,
+ * constant tensor and not dynamic.
+ */
class ExternalTensor : public Tensor
{
public:
ExternalTensor() = delete;
public:
- ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout) : Tensor(info, layout)
+ ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
+ : Tensor(info, layout, nullptr)
{
assert(_layout == ir::Layout::NHWC);
assert(_info.isConstant());
}
public:
+ /**
+ * @brief set Data to be shared from external so that this ExternalTensor will not be
+ * allocated on CPU backend
+ * @param[in] data data of Operand to be set
+ */
void setData(const std::shared_ptr<ir::Data> data)
{
assert(data != nullptr);
TensorBuilder::TensorBuilder()
: _tensor_reg{new cpu_common::TensorRegistry()},
- _static_tensor_mgr{new cpu_common::StaticTensorManager(_tensor_reg)},
- _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)}
+ _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
+ _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
{
/* empty */
}
return _tensor_info_map.find(ind) != _tensor_info_map.end();
}
-void TensorBuilder::prepare(void)
-{
- _static_tensor_mgr->allocateConsts();
- _static_tensor_mgr->allocateNonconsts();
-}
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
void TensorBuilder::allocate()
{
return _tensor_reg->getPortableTensor(ind);
}
-bool TensorBuilder::setExternalTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<IPortableTensor> &tensor)
+bool TensorBuilder::setMigrantTensor(const ir::OperandIndex &ind,
+ const std::shared_ptr<IPortableTensor> &tensor)
{
- return _tensor_reg->setExternalTensor(ind, tensor);
+ return _tensor_reg->setMigrantTensor(ind, tensor);
}
void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); }
-std::shared_ptr<cpu_common::Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
+std::shared_ptr<Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
{
- return _tensor_reg->getManagedTensor(ind);
+ return _tensor_reg->getNativeTensor(ind);
}
std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
#define __ONERT_BACKEND_CPU_TENSOR_BUILDER_H__
#include <backend/cpu_common/DynamicTensorManager.h>
-#include <backend/cpu_common/StaticTensorManager.h>
#include <backend/cpu_common/TensorRegistry.h>
-#include <backend/cpu_common/Tensor.h>
#include <backend/ITensorBuilder.h>
#include <ir/OperandIndexMap.h>
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
#include <unordered_map>
namespace onert
* If not, program will crash with assert or exception.
* @return shared_ptr<Tensor>
*/
- std::shared_ptr<cpu_common::Tensor> at(const ir::OperandIndex &ind);
+ std::shared_ptr<Tensor> at(const ir::OperandIndex &ind);
std::shared_ptr<IPortableTensor> portableAt(const ir::OperandIndex &ind);
- bool setExternalTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<IPortableTensor> &tensor) override;
+ bool setMigrantTensor(const ir::OperandIndex &ind,
+ const std::shared_ptr<IPortableTensor> &tensor) override;
std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; }
private:
const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
- std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
+ std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
};
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchToSpaceNDLayer.h"
+
+#include <cker/operation/BatchToSpaceND.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+BatchToSpaceNDLayer::BatchToSpaceNDLayer()
+ : _input(nullptr), _output(nullptr), _block_shape(nullptr), _crops(nullptr)
+{
+ // DO NOTHING
+}
+
+template <typename T> void BatchToSpaceNDLayer::batchToSpaceNDGeneric()
+{
+ const int32_t NNapiCrops[]{0, 0, 0, 0};
+ const int32_t *_crops_buffer;
+
+ if (_crops == nullptr)
+ {
+ _crops_buffer = NNapiCrops;
+ }
+ else
+ {
+ _crops_buffer = reinterpret_cast<const int32_t *>(_crops->buffer());
+ }
+ nnfw::cker::BatchToSpaceND<T>(
+ getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
+ reinterpret_cast<const int32_t *>(_block_shape->buffer()), _crops_buffer,
+ getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
+}
+
+void BatchToSpaceNDLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+ IPortableTensor *block_shape, IPortableTensor *crops)
+{
+ _output = output;
+ _input = input;
+ _block_shape = block_shape;
+ _crops = crops;
+}
+
+void BatchToSpaceNDLayer::run()
+{
+ if (_output->data_type() == OperandType::FLOAT32)
+ {
+ batchToSpaceNDGeneric<float>();
+ }
+ else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ batchToSpaceNDGeneric<uint8_t>();
+ }
+ else
+ {
+ throw std::runtime_error{"NYI"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_BATCHTOSPACEND_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_BATCHTOSPACEND_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class BatchToSpaceNDLayer : public ::onert::exec::IFunction
+{
+public:
+ BatchToSpaceNDLayer();
+
+public:
+ template <typename T> void batchToSpaceNDGeneric();
+
+ void configure(const IPortableTensor *input, IPortableTensor *output,
+ IPortableTensor *block_shape, IPortableTensor *crops);
+
+ void run() override;
+
+private:
+ const IPortableTensor *_input;
+ IPortableTensor *_output;
+ IPortableTensor *_block_shape;
+ IPortableTensor *_crops;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_BATCHTOSPACEND_LAYER_H__
#include "OperationUtils.h"
+#include <assert.h>
#include <cker/operation/Comparison.h>
using namespace nnfw::cker;
namespace onert
using OpType = onert::ir::operation::Comparison::ComparisonType;
using namespace onert::backend::cpu;
+// Assumes these enum values to be in the order like this
+static_assert(static_cast<int>(OpType::Equal) == 0, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::NotEqual) == 1, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::Greater) == 2, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::GreaterEqual) == 3, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::Less) == 4, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::LessEqual) == 5, "An OpType value has changed!");
+
template <typename T>
void compareQuant8(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
OpType op_type)
¶ms.input2_shift);
params.is_broadcast = !HaveSameShapes(lhs, rhs);
- if (params.is_broadcast)
- {
- switch (op_type)
- {
- case OpType::Equal:
- Broadcast4DSlowEqualWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::NotEqual:
- Broadcast4DSlowNotEqualWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::Greater:
- Broadcast4DSlowGreaterWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::GreaterEqual:
- Broadcast4DSlowGreaterEqualWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::Less:
- Broadcast4DSlowLessWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::LessEqual:
- Broadcast4DSlowLessEqualWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- default:
- throw std::runtime_error{"Invalid OpType for CompareLayer"};
- }
- }
- else // if (requires_broadcast == false)
- {
- switch (op_type)
- {
- case OpType::Equal:
- EqualWithScaling(params, getExtendedTensorShape(lhs),
- reinterpret_cast<const T *>(lhs->buffer()), getExtendedTensorShape(rhs),
- reinterpret_cast<const T *>(rhs->buffer()), getExtendedTensorShape(output),
- reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::NotEqual:
- NotEqualWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::Greater:
- GreaterWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::GreaterEqual:
- GreaterEqualWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::Less:
- LessWithScaling(params, getExtendedTensorShape(lhs),
- reinterpret_cast<const T *>(lhs->buffer()), getExtendedTensorShape(rhs),
- reinterpret_cast<const T *>(rhs->buffer()), getExtendedTensorShape(output),
- reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::LessEqual:
- LessEqualWithScaling(
- params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- default:
- throw std::runtime_error{"Invalid OpType for CompareLayer"};
- }
- }
- return;
+ using CompareFunction =
+ void (*)(ComparisonParams & params, const Shape &input1_shape, const T *input1_data,
+ const Shape &input2_shape, const T *input2_data, const Shape &output_shape,
+ bool *output_data);
+
+ static const CompareFunction broadcast_fns[] = {
+ Broadcast4DSlowEqualWithScaling, Broadcast4DSlowNotEqualWithScaling,
+ Broadcast4DSlowGreaterWithScaling, Broadcast4DSlowGreaterEqualWithScaling,
+ Broadcast4DSlowLessWithScaling, Broadcast4DSlowLessEqualWithScaling,
+ };
+ static const CompareFunction non_broadcast_fns[] = {
+ EqualWithScaling, NotEqualWithScaling, GreaterWithScaling,
+ GreaterEqualWithScaling, LessWithScaling, LessEqualWithScaling,
+ };
+
+ static_assert(sizeof(broadcast_fns) == sizeof(non_broadcast_fns),
+ "Sizes of broadcast_fns and non_broadcast_fns must match!");
+
+ auto index = static_cast<int>(op_type);
+ if (index < 0 || index >= static_cast<int>(sizeof(broadcast_fns) / sizeof(broadcast_fns[0])))
+ throw std::runtime_error{"Invalid OpType for CompareLayer"};
+
+ CompareFunction fn = (params.is_broadcast ? broadcast_fns[index] : non_broadcast_fns[index]);
+
+ fn(params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
}
template <typename T>
{
bool requires_broadcast = !HaveSameShapes(lhs, rhs);
- if (requires_broadcast)
- {
- switch (op_type)
- {
- case OpType::Equal:
- Broadcast4DSlowEqual(
- getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::NotEqual:
- Broadcast4DSlowNotEqual(
- getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::Greater:
- Broadcast4DSlowGreater(
- getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::GreaterEqual:
- Broadcast4DSlowGreaterEqual(
- getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::Less:
- Broadcast4DSlowLess(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output),
- reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::LessEqual:
- Broadcast4DSlowLessEqual(
- getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- default:
- throw std::runtime_error{"Invalid OpType for CompareLayer"};
- }
- }
- else // if (requires_broadcast == false)
- {
- switch (op_type)
- {
- case OpType::Equal:
- EqualNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::NotEqual:
- NotEqualNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output),
- reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::Greater:
- GreaterNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output),
- reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::GreaterEqual:
- GreaterEqualNoScaling(
- getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::Less:
- LessNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
- break;
- case OpType::LessEqual:
- LessEqualNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
- getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
- getExtendedTensorShape(output),
- reinterpret_cast<bool *>(output->buffer()));
- break;
- default:
- throw std::runtime_error{"Invalid OpType for CompareLayer"};
- }
- }
- return;
+ using CompareFunction =
+ void (*)(const Shape &input1_shape, const T *input1_data, const Shape &input2_shape,
+ const T *input2_data, const Shape &output_shape, bool *output_data);
+
+ static const CompareFunction broadcast_fns[] = {
+ Broadcast4DSlowEqual, Broadcast4DSlowNotEqual, Broadcast4DSlowGreater,
+ Broadcast4DSlowGreaterEqual, Broadcast4DSlowLess, Broadcast4DSlowLessEqual,
+ };
+ static const CompareFunction non_broadcast_fns[] = {
+ EqualNoScaling, NotEqualNoScaling, GreaterNoScaling,
+ GreaterEqualNoScaling, LessNoScaling, LessEqualNoScaling,
+ };
+
+ static_assert(sizeof(broadcast_fns) == sizeof(non_broadcast_fns),
+ "Sizes of broadcast_fns and non_broadcast_fns must match!");
+
+ auto index = static_cast<int>(op_type);
+ if (index < 0 || index >= static_cast<int>(sizeof(broadcast_fns) / sizeof(broadcast_fns[0])))
+ throw std::runtime_error{"Invalid OpType for CompareLayer"};
+
+ CompareFunction fn = (requires_broadcast ? broadcast_fns[index] : non_broadcast_fns[index]);
+
+ fn(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
}
+
} // namespace
CompareLayer::CompareLayer()
#include "../Tensor.h"
#include <cker/operation/FullyConnected.h>
+#include <cker/TensorUtils.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
FullyConnectedLayer::FullyConnectedLayer()
: _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
_activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
- _is_hybrid(false)
+ _external_context(nullptr), _is_hybrid(false)
{
// DO NOTHING
}
op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
getTensorShape(_weights), reinterpret_cast<const int8_t *>(_weights->buffer()),
getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena);
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
+ _external_context->ruy_context());
#else
nnfw::cker::FullyConnectedHybrid(
op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
(_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
: reinterpret_cast<const int8_t *>(_weights->buffer()),
getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena);
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
+ _external_context->ruy_context());
-// TODO Enable calling decrease_ref
-#if 0
if (_cached_weights == nullptr || _is_weights_freed)
return;
- auto weight_tensor = dynamic_cast<const Tensor *>(_weights);
- if (weight_tensor)
+ // '_cached_weights is not nullptr and _is_weights_freed is false' means
+ // this weight shape is satisfied with the ruy kernel's prepack cache's condition.
+ // After entering here, it will not enter again except below the case - input is zero-vector
+
+ // if input's elements are filled with zero, it by-passes(does not enter ruy-kernel path)
+ // so that handle this case
+ const int input_size = getTensorShape(_input).FlatSize();
+ if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_input->buffer()), input_size))
+ return;
+
+ auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(_weights);
+
+ // This weight tensor could be other ops' const tensor.
+ // Therefore, below reference should be checked like following
+ auto tensor = const_cast<Tensor *>(weight_tensor);
+ if (tensor->buffer() == nullptr) // ref is already 0?
{
- auto tensor = const_cast<Tensor *>(weight_tensor);
+ _is_weights_freed = true;
+ return;
+ }
- tensor->decrease_ref();
- if (tensor->buffer() == nullptr) // ref == 0?
- {
- _is_weights_freed = true;
- }
+ tensor->decrease_ref();
+ if (tensor->buffer() == nullptr) // ref == 0?
+ {
+ _is_weights_freed = true;
}
-#endif // if 0
#endif
}
+void FullyConnectedLayer::fullyConnectedSparseWeight()
+{
+ float output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+
+ nnfw::cker::FullyConnectedParams op_params;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+ op_params.activation = convertActivationType(_activation);
+
+ int w0_size = getTensorShape(_weights).Dims(0);
+ const uint16_t *w1_segments = _weights->w1_segments();
+ const uint16_t *w1_indices = _weights->w1_indices();
+
+ nnfw::cker::FullyConnectedSparseWeight(
+ op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
+ getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w0_size, w1_segments,
+ w1_indices);
+}
+
void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
const IPortableTensor *bias, ir::Activation activation,
- IPortableTensor *output)
+ IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context)
{
_input = input;
_weights = weights;
_output = output;
_is_hybrid = input->data_type() == OperandType::FLOAT32 &&
weights->data_type() == OperandType::QUANT_INT8_SYMM;
+ _external_context = external_context;
}
void FullyConnectedLayer::run()
{
fullyConnectedHybrid();
}
+ else if (_weights->is_sparse())
+ {
+ fullyConnectedSparseWeight();
+ }
else if (_input->data_type() == OperandType::FLOAT32)
{
fullyConnectedFloat32();
void FullyConnectedLayer::prepare()
{
-#ifdef USE_RUY_GEMV
+ if (_bias && _bias->is_constant())
+ {
+ const int bias_size = getTensorShape(_bias).FlatSize();
+ if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
+ {
+ _bias = nullptr;
+ }
+ }
+
+#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(USE_RUY_GEMV)
// TODO This is workaround
// The only fc hybrid will use ruy kernel
if (_input->data_type() != OperandType::FLOAT32 ||
#define __ONERT_BACKEND_CPU_OPS_FULLYCONNECTEDLAYER_H__
#include <backend/IPortableTensor.h>
+#include "../ExternalContext.h"
#include "OperationUtils.h"
#include <exec/IFunction.h>
void fullyConnectedHybrid();
+ void fullyConnectedSparseWeight();
+
void configure(const IPortableTensor *input, const IPortableTensor *weights,
- const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output);
+ const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context);
void run() override;
ir::Activation _activation;
std::unique_ptr<nnfw::cker::FCTempArena> _temp_arena;
+ std::shared_ptr<ExternalContext> _external_context;
+
bool _is_hybrid;
#ifdef USE_RUY_GEMV
uint8_t *_cached_weights = nullptr; // weights to be cached and a key
+ bool _is_weights_freed = false; // is weights freed?
#endif
};
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "L2NormLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/L2Normalize.h>
+#include <cker/Types.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+void L2NormLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+{
+ assert(input != nullptr);
+ assert(output != nullptr);
+
+ _input = input;
+ _output = output;
+}
+
+void L2NormLayer::run()
+{
+ switch (_input->data_type())
+ {
+ case OperandType::FLOAT32:
+ nnfw::cker::L2NormalizeFloat32(
+ getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ break;
+
+ case OperandType::QUANT_UINT8_ASYMM:
+ {
+ nnfw::cker::L2NormParams params;
+ assert(_input->data_offset() == 128);
+ params.input_zero_point = _input->data_offset();
+ nnfw::cker::L2NormalizeQuant8(
+ params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+ }
+ break;
+
+ default:
+ throw std::runtime_error{"L2Norm: Unsupported data type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_L2NORM_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_L2NORM_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class L2NormLayer : public ::onert::exec::IFunction
+{
+public:
+ L2NormLayer() : _input(nullptr), _output(nullptr)
+ {
+ // Nothing
+ }
+
+public:
+ void configure(const IPortableTensor *_input, IPortableTensor *output);
+
+ void run() override;
+
+private:
+ const IPortableTensor *_input;
+ IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_L2NORM_LAYER_H__
// NYI
}
-void LogSoftMaxLayer::configure(const Tensor *input, const float beta, const int axis,
- Tensor *output)
+void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, const int axis,
+ IPortableTensor *output)
{
_input = input;
_output = output;
void logsoftmaxQuant8();
- void configure(const Tensor *input, const float beta, const int axis, Tensor *output);
+ void configure(const IPortableTensor *input, const float beta, const int axis,
+ IPortableTensor *output);
void run();
private:
- const Tensor *_input;
- Tensor *_output;
+ const IPortableTensor *_input;
+ IPortableTensor *_output;
float _beta;
int _axis;
void *v;
};
+union ConstDataPtr {
+ const uint8_t *u8;
+ const int8_t *i8;
+ const uint32_t *u32;
+ const int32_t *i32;
+ const bool *b;
+ const float *f;
+ const int64_t *i64;
+ const void *v;
+};
+
uint32_t getNumberOfDimensions(const IPortableTensor *tensor);
uint32_t getNumberOfElements(const IPortableTensor *tensor);
// DO NOTHING
}
-void PadLayer::padFloat32()
+template <typename T> void PadLayer::padImpl(const T *constant_value_data)
{
- nnfw::cker::Pad(_padData, _padRank, getTensorShape(_input),
- reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<float *>(_output->buffer()), _constantValueData.f);
+ nnfw::cker::Pad<T>(_padData, _padRank, getTensorShape(_input),
+ reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
+ reinterpret_cast<T *>(_output->buffer()), constant_value_data);
}
-void PadLayer::padQuant8() { throw std::runtime_error("Quantized Pad isn't supported NYI"); }
void PadLayer::configure(const IPortableTensor *input, IPortableTensor *output,
- const int32_t *padData, int32_t padRank, uint8_t *constantValueData)
+ const int32_t *padData, int32_t padRank, const void *constantValueData)
{
_input = input;
_output = output;
memcpy(_padData, padData, sizeof(_padData));
_padRank = padRank;
- _constantValueData.u8 = constantValueData;
+ _constantValueData.v = constantValueData;
}
void PadLayer::run()
{
if (_input->data_type() == OperandType::FLOAT32)
{
- padFloat32();
+ padImpl<float>(_constantValueData.f);
}
else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
- padQuant8();
+ if (_constantValueData.u8 == nullptr)
+ {
+ uint8_t pad_value = static_cast<uint8_t>(_output->data_offset());
+ padImpl<uint8_t>(&pad_value);
+ }
+ else
+ {
+ padImpl<uint8_t>(_constantValueData.u8);
+ }
}
else
{
PadLayer();
public:
- void padFloat32();
-
- void padQuant8();
+ template <typename T> void padImpl(const T *constant_value_data);
void configure(const IPortableTensor *input, IPortableTensor *output, const int32_t *padData,
- int32_t padRank, uint8_t *constantValueData = nullptr);
+ int32_t padRank, const void *constantValueData = nullptr);
void run() override;
int32_t _padData[8];
int32_t _padRank;
- DataPtr _constantValueData;
+ ConstDataPtr _constantValueData;
};
} // namespace ops
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeLayer.h"
+
+#include <cker/operation/Quantize.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+QuantizeLayer::QuantizeLayer() : _input(nullptr), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+template <typename InputT, typename OutputT> void QuantizeLayer::affineQuantize()
+{
+ nnfw::cker::Quantize(getTensorShape(_input), reinterpret_cast<const InputT *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<OutputT *>(_output->buffer()),
+ _output->data_scale(), _output->data_offset());
+}
+
+void QuantizeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+{
+ _input = input;
+ _output = output;
+}
+
+void QuantizeLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ affineQuantize<float, uint8_t>();
+ }
+ else
+ {
+ throw std::runtime_error{"Quantize: unsupported data type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class QuantizeLayer : public ::onert::exec::IFunction
+{
+public:
+ QuantizeLayer();
+
+public:
+ template <typename InputT, typename OutputT> void affineQuantize();
+
+ void configure(const IPortableTensor *input, IPortableTensor *output);
+
+ void run() override;
+
+private:
+ const IPortableTensor *_input;
+ IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU6Layer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/ReLU6.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+ReLU6Layer::ReLU6Layer() : _input(nullptr), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+void ReLU6Layer::relu6Float32()
+{
+ nnfw::cker::ReLU6(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ reinterpret_cast<float *>(_output->buffer()));
+}
+
+void ReLU6Layer::relu6Quant8()
+{
+ // cker quant8 relu is not implemented yet
+ throw std::runtime_error{"NYI"};
+}
+
+void ReLU6Layer::configure(const IPortableTensor *input, IPortableTensor *output)
+{
+ _input = input;
+ _output = output;
+}
+
+void ReLU6Layer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ relu6Float32();
+ }
+ else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ relu6Quant8();
+ }
+ else
+ {
+ throw std::runtime_error{"ReLU6: unsupported data type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class ReLU6Layer : public ::onert::exec::IFunction
+{
+public:
+ ReLU6Layer();
+
+public:
+ void relu6Float32();
+
+ void relu6Quant8();
+
+ void configure(const IPortableTensor *input, IPortableTensor *output);
+
+ void run() override;
+
+private:
+ const IPortableTensor *_input;
+ IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
throw std::runtime_error{"Reduce(generic): unsupported data type"};
}
}
+
+void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
+ const std::vector<int> &axes, bool keep_dims,
+ nnfw::cker::Reduce &reduce_kernel)
+{
+ const bool same_scale = (input->data_scale() == output->data_scale() &&
+ input->data_offset() == output->data_offset());
+
+ reduce_kernel.prepare(input->num_dimensions(), axes.size());
+
+ if (!same_scale)
+ {
+ std::vector<int32_t> temp_sum(output->getShape().num_elements());
+ bool result = reduce_kernel.QuantizedMeanOrSum<uint8_t, int32_t>(
+ reinterpret_cast<const uint8_t *>(input->buffer()), input->data_offset(),
+ input->data_scale(), getTensorShape(input), reinterpret_cast<uint8_t *>(output->buffer()),
+ output->data_offset(), output->data_scale(), getTensorShape(output), axes, keep_dims,
+ temp_sum.data(), true, [](const int32_t current, const uint8_t in) -> int32_t {
+ const int32_t actual_in = static_cast<int32_t>(in);
+ return current + actual_in;
+ });
+
+ if (!result)
+ {
+ throw std::runtime_error{"Reduce: Fail to run"};
+ }
+
+ return;
+ }
+
+ evalGeneric<ReduceType::kSum>(input, output, axes, keep_dims, reduce_kernel);
+}
+
} // namespace
ReduceLayer::ReduceLayer()
switch (_reduceType)
{
case ReduceType::kSum:
+ if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ evalSumQuantized(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ return;
+ }
evalGeneric<ReduceType::kSum>(_input, _output, axes, _keep_dims, *_reduce_kernel);
break;
case ReduceType::kProd:
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "OperationUtils.h"
+#include "ResizeBilinearLayer.h"
+#include "cker/operation/ResizeBilinear.h"
+#include <cker/Types.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+ResizeBilinearLayer::ResizeBilinearLayer()
+ : _input(nullptr), _output(nullptr), _output_height(0), _output_width(0), _align_corners(false),
+ _half_pixel_centers(false)
+{
+ // DO NOTHING
+}
+
+void ResizeBilinearLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+ int32_t output_height, int32_t output_width, bool align_corners,
+ bool half_pixel_centers)
+{
+ _input = input;
+ _output = output;
+ _output_height = output_height;
+ _output_width = output_width;
+ _align_corners = align_corners;
+ _half_pixel_centers = half_pixel_centers;
+}
+
+void ResizeBilinearLayer::run()
+{
+ nnfw::cker::ResizeBilinearParams params;
+ params.align_corners = _align_corners;
+ params.half_pixel_centers = _half_pixel_centers;
+ params.output_height = _output_height;
+ params.output_width = _output_width;
+
+ switch (_input->data_type())
+ {
+ case OperandType::FLOAT32:
+ nnfw::cker::ResizeBilinear(
+ params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ break;
+
+ case OperandType::QUANT_UINT8_ASYMM:
+ nnfw::cker::ResizeBilinear(
+ params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+ break;
+
+ case OperandType::UINT8:
+ case OperandType::BOOL8:
+ case OperandType::FLOAT16:
+ case OperandType::INT32:
+ case OperandType::INT64:
+ case OperandType::QUANT_INT8_SYMM:
+ std::runtime_error("ResizeBilinear NYI");
+ break;
+ default:
+ std::runtime_error("ResizeBilinear unsupported data type");
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_RESIZEBILINEAR_H__
+#define __ONERT_BACKEND_CPU_OPS_RESIZEBILINEAR_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class ResizeBilinearLayer : public ::onert::exec::IFunction
+{
+public:
+ ResizeBilinearLayer();
+
+public:
+ void configure(const IPortableTensor *input1, IPortableTensor *output, int32_t output_height,
+ int32_t output_width, bool align_corners, bool half_pixel_centers);
+
+ void run() override;
+
+private:
+ const IPortableTensor *_input;
+ IPortableTensor *_output;
+ int32_t _output_height;
+ int32_t _output_width;
+ bool _align_corners;
+ bool _half_pixel_centers;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_RESIZEBILINEAR_H__
}
}
-void SliceLayer::sliceFloat32()
+template <typename T> void SliceLayer::sliceImpl()
{
const int kMaxDim = nnfw::cker::Shape::kMaxSmallSize;
}
nnfw::cker::Slice(op_params, getExtendedTensorShape(_input),
- reinterpret_cast<const float *>(_input->buffer()),
- reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SliceLayer::sliceQuant8()
-{
- // cker quant8 slice is not implemented yet
- throw std::runtime_error{"NYI"};
+ reinterpret_cast<const T *>(_input->buffer()),
+ reinterpret_cast<T *>(_output->buffer()));
}
void SliceLayer::configure(const IPortableTensor *input, const IPortableTensor *begin,
{
if (_input->data_type() == OperandType::FLOAT32)
{
- sliceFloat32();
+ sliceImpl<float>();
}
else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
- sliceQuant8();
+ sliceImpl<uint8_t>();
}
else
{
void run() override;
private:
- void sliceFloat32();
- void sliceQuant8();
+ template <typename T> void sliceImpl();
template <typename T>
void GetBeginAndSizeVectors(int dimensions, const IPortableTensor *begin,
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SpaceToDepthLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/SpaceToDepth.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+SpaceToDepthLayer::SpaceToDepthLayer() : _input(nullptr), _block_size(0), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+template <typename T> void SpaceToDepthLayer::spaceToDepth()
+{
+
+ nnfw::cker::SpaceToDepthParams params;
+ params.block_size = _block_size;
+
+ nnfw::cker::SpaceToDepth(params, getTensorShape(_input),
+ reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
+ reinterpret_cast<T *>(_output->buffer()));
+}
+
+void SpaceToDepthLayer::configure(const IPortableTensor *input, const int32_t block_size,
+ IPortableTensor *output)
+{
+ _input = input;
+ _block_size = block_size;
+ _output = output;
+}
+
+void SpaceToDepthLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ spaceToDepth<float>();
+ }
+ else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ spaceToDepth<uint8_t>();
+ }
+ else
+ {
+ throw std::runtime_error{"SpaceToDepth: unsupported data type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_SPACE_TO_DEPTH_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_SPACE_TO_DEPTH_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class SpaceToDepthLayer : public ::onert::exec::IFunction
+{
+public:
+ SpaceToDepthLayer();
+
+ void configure(const IPortableTensor *input, const int32_t block_size, IPortableTensor *output);
+
+ void run() override;
+
+private:
+ template <typename T> void spaceToDepth();
+
+ const IPortableTensor *_input;
+ int32_t _block_size;
+ IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_SPACE_TO_BATCH_ND_LAYER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SplitVLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/SplitV.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+SplitVLayer::SplitVLayer()
+ : _input(nullptr), _size_splits(nullptr), _split_dim(nullptr), _num_splits(0), _outputs()
+{
+ // DO NOTHING
+}
+
+template <typename T> void SplitVLayer::splitV(void)
+{
+ nnfw::cker::SplitVParams op_params;
+ op_params.axis = *(reinterpret_cast<const int32_t *>(_split_dim->buffer()));
+ op_params.num_split = _num_splits;
+
+ std::vector<T *> outputPtrs;
+ std::vector<nnfw::cker::Shape> outshape;
+
+ for (const auto output : _outputs)
+ {
+ assert(output->total_size() == sizeOfData(output->data_type(), output->getShape().dims()));
+ outputPtrs.emplace_back(reinterpret_cast<T *>(output->buffer()));
+ outshape.emplace_back(getTensorShape(output));
+ }
+
+ assert(_input->total_size() == sizeOfData(_input->data_type(), _input->getShape().dims()));
+ nnfw::cker::SplitV<T>(op_params, getTensorShape(_input), reinterpret_cast<T *>(_input->buffer()),
+ outshape, outputPtrs.data());
+}
+
+void SplitVLayer::configure(const IPortableTensor *input, const IPortableTensor *size_splits,
+ const IPortableTensor *split_dim, uint16_t num_splits,
+ std::vector<IPortableTensor *> &outputs)
+{
+ assert(input != nullptr);
+
+ _num_splits = num_splits;
+ _size_splits = size_splits;
+ _input = input;
+ _split_dim = split_dim;
+ _outputs = outputs;
+}
+
+void SplitVLayer::run()
+{
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ splitV<float>();
+ }
+ else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ splitV<uint8_t>();
+ }
+ else if (_input->data_type() == OperandType::INT32)
+ {
+ splitV<int32_t>();
+ }
+ else if (_input->data_type() == OperandType::INT64)
+ {
+ splitV<int64_t>();
+ }
+ else
+ {
+ throw std::runtime_error{"SplitV: unsupported input type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_SPLIT_V_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_SPLIT_V_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class SplitVLayer : public ::onert::exec::IFunction
+{
+public:
+ SplitVLayer();
+
+public:
+ template <typename T> void splitV(void);
+
+ void configure(const IPortableTensor *input, const IPortableTensor *size_splits,
+ const IPortableTensor *size_dim, uint16_t num_splits,
+ std::vector<IPortableTensor *> &outputs);
+
+ void run() override;
+
+private:
+ const IPortableTensor *_input;
+ const IPortableTensor *_size_splits;
+ const IPortableTensor *_split_dim;
+ uint16_t _num_splits;
+ std::vector<IPortableTensor *> _outputs;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_SPLIT_V_LAYER_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StatelessRandomUniformLayer.h"
+
+#include <cker/operation/StatelessRandomUniform.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+StatelessRandomUniformLayer::StatelessRandomUniformLayer()
+ : _shape(nullptr), _seed(nullptr), _output(nullptr)
+{
+ // DO NOTHING
+}
+
+void StatelessRandomUniformLayer::configure(const IPortableTensor *shape,
+ const IPortableTensor *seed, IPortableTensor *output)
+{
+ _shape = shape;
+ _seed = seed;
+ _output = output;
+}
+
+void StatelessRandomUniformLayer::StatelessRandomUniformFloat32()
+{
+ nnfw::cker::StatelessRandomUniform(
+ getTensorShape(_shape), reinterpret_cast<const int *>(_shape->buffer()),
+ getTensorShape(_seed), reinterpret_cast<const int *>(_seed->buffer()),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void StatelessRandomUniformLayer::run()
+{
+ switch (_output->data_type())
+ {
+ // ToDo : It need to support INT8 and UINT8 also when will be applied quantization.
+ case OperandType::FLOAT32:
+ StatelessRandomUniformFloat32();
+ break;
+ default:
+ throw std::runtime_error{"StatelessRandomUniformLayer: unsupported data type"};
+ }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_STATELESS_RANDOM_UNIFORM_H__
+#define __ONERT_BACKEND_CPU_OPS_STATELESS_RANDOM_UNIFORM_H__
+
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class StatelessRandomUniformLayer : public ::onert::exec::IFunction
+{
+public:
+ StatelessRandomUniformLayer();
+
+public:
+ void configure(const IPortableTensor *shape, const IPortableTensor *seed,
+ IPortableTensor *output);
+
+ void StatelessRandomUniformFloat32();
+
+ void run() override;
+
+private:
+ const IPortableTensor *_shape;
+ const IPortableTensor *_seed;
+
+ IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_STATELESS_RANDOM_UNIFORM_H__
{
}
+ virtual ~BackendContext() = default;
+
void initialize(const std::vector<OperationInfo> &operation_list,
const std::vector<ir::OperandIndex> &operand_list);
void initConsts();
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_IEXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_IEXTERNAL_CONTEXT_H__
+
+namespace onert
+{
+namespace backend
+{
+
+struct IExternalContext
+{
+ virtual ~IExternalContext() = default;
+ virtual void setMaxNumThreads(int) = 0;
+};
+
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_IEXTERNAL_CONTEXT__
{
public:
virtual ~IPortableTensor() = default;
+ virtual bool is_sparse() const { return false; }
+ virtual const uint16_t *w1_segments() const { return nullptr; }
+ virtual const uint16_t *w1_indices() const { return nullptr; }
public:
bool has_padding() const final { return false; }
namespace backend
{
+struct IDynamicTensorManager;
+
class ITensor
{
public:
virtual void access(const std::function<void(ITensor &tensor)> &fn) = 0;
/**
+ * @brief Return the dynamic tensor manager
+ *
+ * If dynamic tensors are not supported, it returns @c nullptr .
+ *
+ * @return IDynamicTensorManager* DynamicTensorManager
+ */
+ virtual IDynamicTensorManager *dynamic_tensor_manager() { return nullptr; }
+
+ /**
* @brief Return true if the tensor is constant
*/
virtual bool is_constant() const
virtual std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) = 0;
/**
- * @brief Set the External Tensor object
+ * @brief Set the migrant tensor object
*
* @return true if succeeded
* @return false if failed or unsupported
*/
- virtual bool setExternalTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &)
+ virtual bool setMigrantTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &)
{
return false;
}
virtual ~ITensorRegistry() = default;
/**
- * @brief Returns pointer of ITensor among managed and external tensors
+ * @brief Returns pointer of ITensor among native and migrant tensors
+ *
+ * Native Tensor is a tensor that is managed by this backend
+ * Migrant Tensor is a tensor that is imported from another backend
+ *
* @note Return tensor cannot be used longer than dynamic tensor manager
*/
virtual std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &) = 0;
/**
- * @brief Returns pointer of ITensor among managed tensors
+ * @brief Returns pointer of ITensor among native tensors
*
- * Unlike @c getITensor , this function only searches from managed tensors
- * @note Return tensor cannot be used longer than dynamic tensor manager
+ * Unlike @c getITensor , this function only searches from native tensors
+ *
+ * @note Returned tensor cannot be used longer than dynamic tensor manager
*/
- virtual std::shared_ptr<ITensor> getManagedITensor(const ir::OperandIndex &) = 0;
+ virtual std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &) = 0;
};
} // namespace backend
std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
{
static_assert(std::is_base_of<ITensor, T_Tensor>::value, "T_Tensor must derive from ITensor.");
- auto external_tensor = _external.find(ind);
- if (external_tensor != _external.end())
+ auto external_tensor = _migrant.find(ind);
+ if (external_tensor != _migrant.end())
return external_tensor->second;
- return getManagedTensor(ind);
+ return getNativeTensor(ind);
}
- std::shared_ptr<ITensor> getManagedITensor(const ir::OperandIndex &ind) override
+ std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
{
- return getManagedTensor(ind);
+ return getNativeTensor(ind);
}
std::shared_ptr<IPortableTensor> getPortableTensor(const ir::OperandIndex &ind)
{
- auto external_tensor = _external.find(ind);
- if (external_tensor != _external.end())
+ auto external_tensor = _migrant.find(ind);
+ if (external_tensor != _migrant.end())
{
if (external_tensor->second)
return external_tensor->second;
}
- return getManagedTensor(ind);
+ return getNativeTensor(ind);
}
- std::shared_ptr<T_Tensor> getManagedTensor(const ir::OperandIndex &ind)
+ std::shared_ptr<T_Tensor> getNativeTensor(const ir::OperandIndex &ind)
{
- auto tensor = _managed.find(ind);
- if (tensor != _managed.end())
+ auto tensor = _native.find(ind);
+ if (tensor != _native.end())
return tensor->second;
return nullptr;
}
- bool setExternalTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<IPortableTensor> &tensor)
+ bool setMigrantTensor(const ir::OperandIndex &ind, const std::shared_ptr<IPortableTensor> &tensor)
{
// TODO Uncomment this as two tensors for an index is not allowed.
// But now it is temporarily allowed as a workaround. External one hides Managed one.
- // auto itr = _managed.find(ind);
- // if (itr != _managed.end() && itr->second != nullptr && tensor != nullptr)
+ // auto itr = _native.find(ind);
+ // if (itr != _native.end() && itr->second != nullptr && tensor != nullptr)
// throw std::runtime_error{
- // "Tried to set an external tensor but an managed tensor already exists."};
- _external[ind] = tensor;
+ // "Tried to set an migrant tensor but an native tensor already exists."};
+ _migrant[ind] = tensor;
return true;
}
- void setManagedTensor(const ir::OperandIndex &ind, const std::shared_ptr<T_Tensor> &tensor)
+ void setNativeTensor(const ir::OperandIndex &ind, const std::shared_ptr<T_Tensor> &tensor)
{
- auto itr = _external.find(ind);
- if (itr != _external.end() && itr->second != nullptr && tensor != nullptr)
+ auto itr = _migrant.find(ind);
+ if (itr != _migrant.end() && itr->second != nullptr && tensor != nullptr)
throw std::runtime_error{
- "Tried to set a managed tensor but an external tensor already exists."};
- _managed[ind] = tensor;
+ "Tried to set a native tensor but an migrant tensor already exists."};
+ _native[ind] = tensor;
}
- const ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &managed_tensors() { return _managed; }
+ const ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &native_tensors() { return _native; }
- const ir::OperandIndexMap<std::shared_ptr<IPortableTensor>> &external_tensors()
+ const ir::OperandIndexMap<std::shared_ptr<IPortableTensor>> &migrant_tensors()
{
- return _external;
+ return _migrant;
}
private:
- ir::OperandIndexMap<std::shared_ptr<IPortableTensor>> _external;
- ir::OperandIndexMap<std::shared_ptr<T_Tensor>> _managed;
+ ir::OperandIndexMap<std::shared_ptr<IPortableTensor>> _migrant;
+ ir::OperandIndexMap<std::shared_ptr<T_Tensor>> _native;
};
} // namespace backend
#include "MemoryManager.h"
-#include "backend/ITensorManager.h"
+#include "backend/IStaticTensorManager.h"
#include "ir/OperandIndexMap.h"
#include "ir/OperandInfo.h"
#include "TensorRegistry.h"
namespace cpu_common
{
-class StaticTensorManager : public backend::ITensorManager
+class StaticTensorManager : public backend::IStaticTensorManager
{
public:
StaticTensorManager(const std::shared_ptr<TensorRegistry> ®);
Tensor() = delete;
public:
- Tensor(const ir::OperandInfo &info, const ir::Layout layout)
- : _info(info), _layout(layout), _buffer(nullptr), _num_references(0), _allocator(nullptr)
+ Tensor(const ir::OperandInfo &info, const ir::Layout layout,
+ IDynamicTensorManager *dynamic_tensor_manager)
+ : _info(info), _layout(layout), _buffer(nullptr), _num_references(0),
+ _dynamic_tensor_manager(dynamic_tensor_manager), _allocator(nullptr)
{
// DO NOTHING
}
public:
// Only one of two method 'setBuffer' must be called once
+
+ /**
+ * @brief Set the Buffer object. This method is called for static and non-const tensor
+ */
void setBuffer(uint8_t *buffer)
{
- assert(_buffer == nullptr && _allocator == nullptr);
+ assert(_buffer == nullptr);
_buffer = buffer;
}
+
+ /**
+ * @brief Set the Buffer object. This method is called for dynamic or const tensor
+ */
void setBuffer(const std::shared_ptr<Allocator> &alloc)
{
- assert(_buffer == nullptr && _allocator == nullptr);
+ assert(_buffer == nullptr);
_allocator = alloc;
+ _buffer = alloc->base();
}
// This works just as setBuffer but it simply overwrite existing Allocator without nullptr check
- void overwriteBuffer(const std::shared_ptr<Allocator> &alloc) { _allocator = alloc; }
+ void overwriteBuffer(const std::shared_ptr<Allocator> &alloc)
+ {
+ _allocator = alloc;
+ _buffer = alloc->base();
+ }
/**
* @brief Mark this tensor does not have memory.
}
public:
- uint8_t *buffer() const override
- {
- if (_allocator != nullptr)
- return _allocator->base();
- else
- return _buffer;
- }
+ uint8_t *buffer() const override { return _buffer; }
/**
* @brief Get dimension by index
*
bool is_constant() const override { return _info.isConstant(); }
bool is_dynamic() const override { return _info.isDynamic(); }
void set_dynamic() override { _info.setDynamic(); }
+ IDynamicTensorManager *dynamic_tensor_manager() override { return _dynamic_tensor_manager; }
+ bool is_sparse() const override { return _info.typeInfo().sparse(); }
+ virtual const uint16_t *w1_segments() const override { return _info.typeInfo().w1_segments(); }
+ virtual const uint16_t *w1_indices() const override { return _info.typeInfo().w1_indices(); }
virtual void increase_ref()
{
assert(is_dynamic() ||
// when not dynamic
- (_buffer != nullptr || _allocator != nullptr));
+ (_buffer != nullptr));
++_num_references;
}
assert(_buffer != nullptr || _allocator != nullptr);
assert(_num_references > 0);
--_num_references;
- // Only constant tensor has allocator pointer
+ // constant tensor and dynamic tensor has _allocator
if (_num_references == 0)
{
if (_buffer != nullptr)
_buffer = nullptr;
- else
+ if (_allocator != nullptr)
{
_allocator->release();
_allocator = nullptr;
ir::Layout _layout;
uint8_t *_buffer;
int32_t _num_references;
+ IDynamicTensorManager *_dynamic_tensor_manager;
private:
+ /**
+ * @brief Memory allocator for dynamic tensor and const tensor
+ * Since maintaing _allocator and also _buffer makes confusion,
+ * we will mainly use _buffer (not _allocator.base()) for memory pointer in this code.
+ * _allocator(shared_ptr) is used to guarantee that we have valid _buffer.
+ */
std::shared_ptr<Allocator> _allocator;
};
void visit(const ir::operation::LogicalNot &op) override;
void visit(const ir::operation::LogicalOr &op) override;
void visit(const ir::operation::Logistic &op) override;
+ void visit(const ir::operation::L2Normalization &op) override;
void visit(const ir::operation::MatrixBandPart &op) override;
void visit(const ir::operation::Max &op) override;
void visit(const ir::operation::Min &op) override;
void visit(const ir::operation::Reshape &op) override;
void visit(const ir::operation::Round &op) override;
void visit(const ir::operation::RSQRT &op) override;
+ void visit(const ir::operation::ResizeBilinear &op) override;
void visit(const ir::operation::Reverse &op) override;
void visit(const ir::operation::Select &op) override;
void visit(const ir::operation::Shape &op) override;
void visit(const ir::operation::LogicalNot &op) override;
void visit(const ir::operation::LogicalOr &op) override;
void visit(const ir::operation::Logistic &op) override;
+ void visit(const ir::operation::L2Normalization &op) override;
void visit(const ir::operation::MatrixBandPart &op) override;
void visit(const ir::operation::Max &op) override;
void visit(const ir::operation::Min &op) override;
void visit(const ir::operation::Reshape &op) override;
void visit(const ir::operation::Round &op) override;
void visit(const ir::operation::RSQRT &op) override;
+ void visit(const ir::operation::ResizeBilinear &op) override;
void visit(const ir::operation::Reverse &op) override;
void visit(const ir::operation::Select &op) override;
void visit(const ir::operation::Shape &op) override;
/**
* @brief To allocate memory for output tensor if needed
*/
+ // TODO Remove this, as it is no longer used
backend::IDynamicTensorManager *_dynamic_tensor_manager;
/**
* @brief To get tensor object and access tensor-level info, e.g., ITensor::buffer()
size_t operandSize(void) const;
const OperationIndexSet &getUses() const { return _uses; }
- const OperationIndexSet &getDef() const { return _def; }
+ OperationIndex getDef() const { return _def; }
void insertUse(const OperationIndex &idx);
void removeUse(const OperationIndex &idx);
- void insertDef(const OperationIndex &idx);
- void removeDef(const OperationIndex &idx);
+ void setDef(const OperationIndex &idx);
+ void unsetDef();
public:
void type(const DataType type) { _info.type(type); };
std::shared_ptr<Data> _data;
OperationIndexSet _uses;
- OperationIndexSet _def; // size is 0 (constant) or 1 (from def operation)
+ OperationIndex _def;
};
} // namespace ir
#include "ir/operation/Pack.h"
#include "ir/operation/Select.h"
#include "ir/operation/Split.h"
+#include "ir/operation/SplitV.h"
#include "ir/operation/Unpack.h"
#include "ir/operation/Pad.h"
#include "ir/operation/Min.h"
#include "ir/operation/BatchMatMul.h"
#include "ir/operation/FusedBatchNorm.h"
#include "ir/operation/LogSoftmax.h"
+#include "ir/operation/Quantize.h"
+#include "ir/operation/StatelessRandomUniform.h"
OP(Pack)
OP(Select)
OP(Split)
+OP(SplitV)
OP(Unpack)
OP(Pad)
OP(Custom)
OP(BatchMatMul)
OP(FusedBatchNorm)
OP(LogSoftmax)
+OP(Quantize)
+OP(StatelessRandomUniform)
#define __ONERT_IR_TYPEINFO_H__
#include <cstdint>
+#include <vector>
#include "ir/DataType.h"
TypeInfo() = delete;
explicit TypeInfo(DataType type, float scale = 0, int32_t offset = 0)
- : _type(type), _scale(scale), _offset(offset)
+ : _type(type), _scale(scale), _offset(offset), _sparse(false)
{
}
DataType type() const { return _type; }
float scale() const { return _scale; }
int32_t offset() const { return _offset; }
+ bool sparse() const { return _sparse; }
+ const uint16_t *w1_segments() const { return _w1_segments.data(); }
+ const uint16_t *w1_indices() const { return _w1_indices.data(); }
public:
void type(const DataType type) { _type = type; }
+ void sparse2DMetadata(std::vector<uint16_t> &&w1_segments, std::vector<uint16_t> &&w1_indices)
+ {
+ _sparse = true;
+ _w1_segments = w1_segments;
+ _w1_indices = w1_indices;
+ }
private:
DataType _type;
+ // for quantization
float _scale;
int32_t _offset;
+ // for sparsity
+ bool _sparse;
+ std::vector<uint16_t> _w1_segments;
+ std::vector<uint16_t> _w1_indices;
};
bool operator==(const TypeInfo &lhs, const TypeInfo &rhs);
enum Input
{
INPUT = 0,
- BLOCK_SIZE = 1
+ BLOCK_SIZE = 1,
+ CROPS_DATA = 2
};
public:
public:
void accept(OperationVisitor &v) const override;
- OpCode opcode() const final { return OpCode::Softmax; }
+ OpCode opcode() const final { return OpCode::LogSoftmax; }
public:
const Param ¶m() const { return _param; }
{
INPUT = 0,
PAD = 1,
- // VALUE = 2 Not allow padding value operand yet
+ VALUE = 2
};
public:
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_QUANTIZE_H__
+#define __ONERT_IR_OPERATION_QUANTIZE_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Quantize : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ };
+
+public:
+ Quantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::Quantize; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_QUANTIZE_H__
public:
enum Input
{
- INPUT = 0
+ INPUT = 0,
};
struct Param
{
int32_t height_out;
int32_t width_out;
+ bool align_corners;
+ bool half_pixel_centers;
};
public:
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ONERT_IR_OPERATION_SPLIT_V_H__
+#define __ONERT_IR_OPERATION_SPLIT_V_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+class SplitV : public Operation
+{
+public:
+ enum Input
+ {
+ INPUT = 0,
+ SIZE_SPLITS = 1,
+ SPLIT_DIM = 2
+ };
+
+ struct Param
+ {
+ int num_splits;
+ };
+
+public:
+ SplitV(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param ¶m);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::SplitV; }
+
+public:
+ const Param ¶m() const { return _param; }
+
+private:
+ Param _param;
+};
+} // namespace operation
+} // namespace ir
+} // namespace onert
+#endif // __ONERT_IR_OPERATION_SPLIT_V_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_STATELESS_RANDOM_UNIFORM_H__
+#define __ONERT_IR_OPERATION_STATELESS_RANDOM_UNIFORM_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class StatelessRandomUniform : public Operation
+{
+public:
+ enum Input
+ {
+ SHAPE = 0,
+ SEED = 1
+ };
+
+public:
+ StatelessRandomUniform(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+ void accept(OperationVisitor &v) const override;
+ OpCode opcode() const final { return OpCode::StatelessRandomUniform; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_STATELESS_RANDOM_UNIFORM_H__
template <typename T> ir::Shape inferRangeShape(T start_val, T limit_val, T delta_val);
+ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t output_height,
+ const int32_t output_width);
+
ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &input_true_shape,
const ir::Shape &input_false_shape);
void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape)
{
// NOTE Handle user tensors first
- auto user_tensor = _user_tensors->getManagedTensor(ind);
+ auto user_tensor = _user_tensors->getNativeTensor(ind);
if (user_tensor)
{
// User tensors cannot be reallocated.
if (buffer_size < new_size)
throw std::runtime_error{"ExecutorBase: output buffer size is less than output tensor size"};
user_tensor->setShape(new_shape);
+ return;
}
- // NOTE Then handle managed tensors
- auto tensor = _tensors->getManagedTensor(ind);
+ // NOTE Then handle native tensors
+ auto tensor = _tensors->getNativeTensor(ind);
assert(tensor);
bool previously_dynamic = tensor->is_dynamic();
const ir::OperandInfo &tensor_info,
ir::Layout backend_layout)
{
- assert(_tensors->getManagedTensor(ind) == nullptr);
- auto tensor = std::make_shared<cpu_common::Tensor>(tensor_info, backend_layout);
- _tensors->setManagedTensor(ind, tensor);
+ assert(_tensors->getNativeTensor(ind) == nullptr);
+ auto tensor = std::make_shared<cpu_common::Tensor>(tensor_info, backend_layout, this);
+ _tensors->setNativeTensor(ind, tensor);
}
void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
auto &input_set = find->second;
for (auto input_ind : input_set)
{
- if (!_tensors->getManagedTensor(input_ind)->is_dynamic())
+ if (!_tensors->getNativeTensor(input_ind)->is_dynamic())
continue;
_dynamic_mem_mgr->deallocate(input_ind);
void DynamicTensorManager::deallocSubgraphOutput(ir::OperandIndex output_ind)
{
- if (!_tensors->getManagedTensor(output_ind)->is_dynamic())
+ if (!_tensors->getNativeTensor(output_ind)->is_dynamic())
return;
_dynamic_mem_mgr->deallocate(output_ind);
* @todo DynamicMemoryManager is not optimized. Optimized one is needed
*/
std::shared_ptr<cpu_common::DynamicMemoryManager> _dynamic_mem_mgr;
+ // TODO Refactoring : Merge two TensorRegistries into one
const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
const std::shared_ptr<UserTensorRegistry> _user_tensors;
std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
for (const auto input_index : node.getInputs())
{
- auto input_alloc = getTensor(input_index);
+ auto input_tensor = getTensor(input_index);
- input_tensors.emplace_back(input_alloc);
+ input_tensors.emplace_back(input_tensor);
}
std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
exec::DynAllocInfoMap outputs_dyn_alloc_info;
for (const auto output_index : node.getOutputs())
{
- auto output_alloc = getTensor(output_index);
+ auto output_tensor = getTensor(output_index);
- output_tensors.emplace_back(output_alloc);
+ output_tensors.emplace_back(output_tensor);
const auto output_tensor_builder = getTensorBuilder(output_index);
if (output_tensor_builder->supportDynamicTensor())
{
auto output_dyn_manager = output_tensor_builder->dynamicTensorManager();
- outputs_dyn_alloc_info[output_alloc] = exec::DynAllocInfo{output_index, output_dyn_manager};
+ outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index, output_dyn_manager};
}
}
std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
for (const auto input_index : node.getInputs())
{
- auto input_alloc = getTensor(input_index);
+ auto input_tensor = getTensor(input_index);
- input_tensors.emplace_back(input_alloc);
+ input_tensors.emplace_back(input_tensor);
}
std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
std::unordered_map<std::shared_ptr<ITensor>, exec::DynAllocInfo> outputs_dyn_alloc_info;
for (const auto output_index : node.getOutputs())
{
- auto output_alloc = getTensor(output_index);
+ auto output_tensor = getTensor(output_index);
- output_tensors.emplace_back(output_alloc);
+ output_tensors.emplace_back(output_tensor);
const auto output_tensor_builder = getTensorBuilder(output_index);
if (output_tensor_builder->supportDynamicTensor())
{
auto output_dyn_manager = output_tensor_builder->dynamicTensorManager();
- outputs_dyn_alloc_info[output_alloc] = exec::DynAllocInfo{output_index, output_dyn_manager};
+ outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index, output_dyn_manager};
}
}
for (auto tensor_builder : _tensor_builder_set)
{
auto reg = tensor_builder->tensorRegistry();
- auto tensor = reg ? reg->getManagedITensor(index) : tensor_builder->tensorAt(index);
+ auto tensor = reg ? reg->getNativeITensor(index) : tensor_builder->tensorAt(index);
if (tensor)
{
ret = tensor_builder;
std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind)
{
// NOTE Find from User Tensor Registry first
- // FIXME There may be both user tensor and managed tensor for a `ind` which is a waste
+ // FIXME There may be both user tensor and native tensor for a `ind` which is a waste
auto user_tensor = _user_tensor_reg->getITensor(ind);
auto tensor = _tensor_reg->getITensor(ind);
if (user_tensor)
std::shared_ptr<cpu_common::Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
{
- return _tensor_reg->getManagedTensor(ind);
+ return _tensor_reg->getNativeTensor(ind);
}
std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
void TensorBuilder::setUserTensor(const ir::OperandIndex &ind,
const std::shared_ptr<UserTensor> &tensor)
{
- _user_tensor_reg->setManagedTensor(ind, tensor);
+ _user_tensor_reg->setNativeTensor(ind, tensor);
}
} // namespace controlflow
class UserTensor : public IPortableTensor
{
public:
- UserTensor(const ir::OperandInfo &info, ir::Layout layout, uint8_t *buffer, size_t size)
- : _info{info}, _layout{layout}, _buffer{buffer}, _size{size}, _dynamic{false}
+ UserTensor(const ir::OperandInfo &info, ir::Layout layout, uint8_t *buffer, size_t size,
+ IDynamicTensorManager *dynamic_tensor_manager)
+ : _info{info}, _layout{layout}, _buffer{buffer}, _size{size}, _dynamic{false},
+ _dynamic_tensor_manager{dynamic_tensor_manager}
{
}
- UserTensor(const ir::OperandInfo &info, ir::Layout layout) : UserTensor{info, layout, nullptr, 0}
+ UserTensor(const ir::OperandInfo &info, ir::Layout layout,
+ IDynamicTensorManager *dynamic_tensor_manager)
+ : UserTensor{info, layout, nullptr, 0, dynamic_tensor_manager}
{
}
void set_dynamic() override { _dynamic = true; }
ir::Shape getShape() const override { return _info.shape(); }
void setShape(const ir::Shape &new_shape) override { _info.shape(new_shape); }
+ bool is_constant() const override { return false; }
+ IDynamicTensorManager *dynamic_tensor_manager() override { return _dynamic_tensor_manager; }
private:
ir::OperandInfo _info;
uint8_t *_buffer;
size_t _size;
bool _dynamic;
+ IDynamicTensorManager *_dynamic_tensor_manager;
};
} // namespace controlflow
{
VERBOSE_F() << ind << std::endl;
- auto tensor = _tensors->getManagedTensor(ind);
+ auto tensor = _tensors->getNativeTensor(ind);
assert(tensor);
bool previously_dynamic = tensor->is_dynamic();
const ir::OperandInfo &tensor_info,
ir::Layout backend_layout)
{
- assert(_tensors->getManagedTensor(ind) == nullptr);
- auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout);
- _tensors->setManagedTensor(ind, tensor);
+ assert(_tensors->getNativeTensor(ind) == nullptr);
+ auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, this);
+ _tensors->setNativeTensor(ind, tensor);
}
void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
auto &input_set = find->second;
for (auto input_ind : input_set)
{
- auto *tensor = _tensors->getManagedTensor(input_ind).get();
+ auto *tensor = _tensors->getNativeTensor(input_ind).get();
if (!tensor->is_dynamic())
continue;
void DynamicTensorManager::deallocSubgraphOutput(ir::OperandIndex output_ind)
{
- auto *tensor = _tensors->getManagedTensor(output_ind).get();
+ auto *tensor = _tensors->getNativeTensor(output_ind).get();
if (!tensor->is_dynamic())
return;
#include "backend/cpu_common/StaticTensorManager.h"
+#include "backend/cpu_common/DynamicTensorManager.h"
#include <util/logging.h>
namespace onert
void StaticTensorManager::allocateConsts(void)
{
- for (auto &pair : _tensors->managed_tensors())
+ for (auto &pair : _tensors->native_tensors())
{
const auto &ind = pair.first;
auto tensor = pair.second;
auto mem_alloc = _const_mgr->allocate(ind, tensor->total_size());
tensor->setBuffer(mem_alloc);
auto buffer = mem_alloc->base();
- VERBOSE(CPU_StaticTensorManager) << "CONSTANT TENSOR(#" << ind.value()
- << "): " << static_cast<void *>(buffer)
- << "size : " << tensor->total_size() << std::endl;
+ VERBOSE(CPU_COMMON_StaticTensorManager) << "CONSTANT TENSOR(#" << ind.value()
+ << "): " << static_cast<void *>(buffer)
+ << "size : " << tensor->total_size() << std::endl;
}
}
}
{
_nonconst_mgr->allocate();
- for (auto &pair : _tensors->managed_tensors())
+ for (auto &pair : _tensors->native_tensors())
{
const auto &ind = pair.first;
auto tensor = pair.second;
auto *buffer = _nonconst_mgr->getBuffer(ind);
tensor->setBuffer(buffer);
- VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
- << "): " << static_cast<void *>(buffer) << std::endl;
+ VERBOSE(CPU_COMMON_StaticTensorManager) << "TENSOR(#" << ind.value()
+ << "): " << static_cast<void *>(buffer) << std::endl;
}
}
}
const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
bool as_const)
{
- assert(!_tensors->getManagedTensor(ind));
- auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout);
- _tensors->setManagedTensor(ind, tensor);
+ assert(!_tensors->getNativeTensor(ind));
+ auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, nullptr);
+ _tensors->setNativeTensor(ind, tensor);
_as_constants[ind] = as_const;
}
void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
{
- assert(_tensors->getManagedTensor(ind));
+ assert(_tensors->getNativeTensor(ind));
// This method is called only when a tensor has proper shape
- assert(!_tensors->getManagedTensor(ind)->is_dynamic());
+ assert(!_tensors->getNativeTensor(ind)->is_dynamic());
if (!_as_constants[ind])
_nonconst_mgr->claimPlan(ind, size);
void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
{
- assert(_tensors->getManagedTensor(ind));
+ assert(_tensors->getNativeTensor(ind));
// This method is called only when a tensor has proper shape
- assert(!_tensors->getManagedTensor(ind)->is_dynamic());
+ assert(!_tensors->getNativeTensor(ind)->is_dynamic());
if (!_as_constants[ind])
_nonconst_mgr->releasePlan(ind);
void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
{
- for (const auto &it : _tensors->managed_tensors())
+ for (const auto &it : _tensors->native_tensors())
fn(it.first);
}
const auto &operand = lowered_graph.graph().operands().at(ind);
auto tensor = std::make_shared<backend::controlflow::UserTensor>(
operand.info(),
- ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */);
+ ir::Layout::NHWC, /* FIXME find op_seq for this operand and use frontend_layout */
+ cf_tensor_builder->dynamicTensorManager());
// Add tensor to controlflow TensorRegistry.
cf_tensor_builder->setUserTensor(ind, tensor);
ret.push_back(tensor);
-
- // Set other tensors as external tensors
- for (auto &tensor_builder : tensor_builders)
- {
- // FIXME This is a workaround registering all user tensors to all backends
- // FIXME Handle when it is failed
- tensor_builder->setExternalTensor(ind, tensor);
- }
}
return ret;
}
+void ExecutorFactory::prepareExternalTensors(ir::LoweredGraph &lowered_graph,
+ TensorBuilders &tensor_builders)
+{
+ lowered_graph.op_seqs().iterate(
+ [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
+ auto lower_info = lowered_graph.getLowerInfo(op_seq_index);
+ auto &backend_ctx = lowered_graph.backend_contexts().at(lower_info->backend());
+ for (auto ind : (op_seq.getInputs() + op_seq.getOutputs()) | ir::Remove::DUPLICATED |
+ ir::Remove::UNDEFINED)
+ {
+ // If an OpSequence input/output tensor does not have a own tensor object,
+ // it must be using external tensors, so find the tensor from other tensor builders and
+ // set the tensor to this tensor builder if portable
+ if (!backend_ctx->tensor_builder->tensorAt(ind))
+ {
+ auto tensor = tensor_builders.getITensor(ind);
+ assert(tensor); // The tensor must have been created in one of TensorBuilders
+ auto ptensor = std::dynamic_pointer_cast<backend::IPortableTensor>(tensor);
+ if (ptensor)
+ backend_ctx->tensor_builder->setMigrantTensor(ind, ptensor);
+ }
+ }
+ });
+}
+
exec::IExecutor *
ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
tensor_builder->prepare();
}
+ prepareExternalTensors(*lowered_graph, tensor_builders);
+
ExecutionBuilder builder;
// Generate kernels
tensor_builder->prepare();
}
+ prepareExternalTensors(*lowered_graph, tensor_builders);
+
ExecutionBuilder builder;
// Generate kernels
#include "backend/ITensor.h"
#include "exec/IExecutor.h"
#include "ir/LoweredGraph.h"
+#include "TensorBuilders.h"
namespace onert
{
static std::vector<std::shared_ptr<backend::ITensor>>
initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
const ir::OperandIndexSequence &indices);
+ static void prepareExternalTensors(ir::LoweredGraph &lowered_graph,
+ TensorBuilders &tensor_builders);
static exec::IExecutor *
createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
const compiler::CompilerOptions &options,
// manipulate output of operation and op_seq
// - replace output of the last operation's output to new operand
- // with old operand's removeDef and new operand's appendDef()
+ // with old operand's unsetDef and new operand's appendDef()
manipulateOutput(op_seq_ind, op_seq_output_ind, new_op_ind);
// new op
last_node.replaceOutputs(op_seq_output_ind, new_op_ind);
// op_seq_obj doesn't have uses/def
- output_obj.removeDef(last_node_ind);
- new_op_obj.insertDef(last_node_ind);
+ assert(output_obj.getDef() == last_node_ind);
+ output_obj.unsetDef();
+ new_op_obj.setDef(last_node_ind);
}
ir::OperationIndex
const auto new_node_ind = operations.push(std::move(new_node));
input_obj.insertUse(new_node_ind);
- new_op_obj.insertDef(new_node_ind);
+ new_op_obj.setDef(new_node_ind);
return new_node_ind;
}
const auto new_node_ind = operations.push(std::move(new_node));
new_op_obj.insertUse(new_node_ind);
- output_obj.insertDef(new_node_ind);
+ output_obj.setDef(new_node_ind);
return new_node_ind;
}
for (auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
auto &obj = operands.at(ind);
- obj.removeDef(first_node_ind);
+ assert(obj.getDef() == first_node_ind);
+ obj.unsetDef();
VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Def(Node#"
<< first_node_ind.value() << ") is removed" << std::endl;
}
continue;
// This operand is output of operation, not weight or bias
- if (operand.getDef().size() > 0)
+ if (operand.getDef().valid())
++prev_op_cnt;
// Current node has multiple inputs as concat or at the beginning of the separated branch
const auto &input_operand = _graph->operands().at(input_operand_idx);
const bool quant = input_operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM;
- for (const auto &input_node_idx : input_operand.getDef())
+ auto input_node_idx = input_operand.getDef();
+ if (input_node_idx.valid())
{
// Data transfer cost from parent's node backend to current node's backend:
auto parent_backend = _backend_resolver->getBackend(input_node_idx);
* @param[in] backend_resolver backend resolver
*/
HEScheduler(const backend::BackendContexts &backend_contexts, const CompilerOptions &options)
- : _backend_contexts{backend_contexts}, _is_supported{}, _backends_avail_time{}, _ops_eft{},
+ : _is_supported{}, _backends_avail_time{}, _ops_eft{},
_op_to_rank{std::make_shared<ir::OperationIndexMap<int64_t>>()},
_is_profiling_mode{options.he_profiling_mode},
_is_linear_exec{options.executor == "Linear"},
_is_parallel_exec{options.executor == "Parallel"}
{
- // Workaround to avoid unused-private-field warning
- // TODO use _backend_contexts and remove workaround
- (void)_backend_contexts;
-
for (auto &entry : backend_contexts)
{
_all_backends.push_back(entry.first);
// whether it should assign these backends to these nodes:
// * It stores false for unsupported nodes
// * During rank calculation with enabled profiling mode it stores true for supported nodes
- const backend::BackendContexts &_backend_contexts;
std::unordered_map<const backend::Backend *, std::unordered_map<std::string, bool>> _is_supported;
// Finishing and starting time of each backend
std::unordered_map<const backend::Backend *, std::map<int64_t, int64_t>> _backends_avail_time;
std::unique_ptr<compiler::BackendResolver> _backend_resolver;
std::unique_ptr<exec::ExecTime> _exec_time;
const ir::Graph *_graph{nullptr};
- std::vector<const backend::Backend *>
- _all_backends; // TODO Remove this and use _backend_contexts instead
+ std::vector<const backend::Backend *> _all_backends;
const backend::Backend *_cpu_backend{nullptr}; // TODO Change this to controlflow_backend
bool _is_profiling_mode;
bool _is_linear_exec;
}
uses_map[ind] = obj.getUses().size();
- def_map[ind] = obj.getDef().size(); // should be 1 or 0
+ def_map[ind] = obj.getDef().valid() ? 1 : 0;
bool is_const = obj.isConstant();
if (is_const)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_COMPILER_OPERAND_CONTEXT_H__
-#define __ONERT_COMPILER_OPERAND_CONTEXT_H__
-
-#include "backend/ITensor.h"
-#include "ir/OperandIndexMap.h"
-#include <unordered_map>
-#include <memory>
-
-namespace onert
-{
-namespace compiler
-{
-
-class OperandContext
-{
-public:
- OperandContext &set(const ir::OperandIndex &ind, const std::shared_ptr<backend::ITensor> &tensor);
-
-public:
- bool exist(const ir::OperandIndex &ind) const { return _tensors.find(ind) != _tensors.end(); }
-
-public:
- std::shared_ptr<backend::ITensor> at(const ir::OperandIndex &ind) const
- {
- return _tensors.at(ind);
- }
-
- std::shared_ptr<backend::ITensor> &at(const ir::OperandIndex &ind) { return _tensors.at(ind); }
-
- void iterate(const std::function<void(const ir::OperandIndex &, backend::ITensor &)> &fn);
-
-private:
- ir::OperandIndexMap<std::shared_ptr<backend::ITensor>> _tensors;
-};
-
-} // namespace compiler
-} // namespace onert
-
-#endif // __ONERT_COMPILER_OPERAND_CONTEXT_H__
{
}
+void OperationValidator::checkUnaryOp(const ir::Operation &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ // Check if I/O types match
+ OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
+
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ // Check if I/O shapes match
+ OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+}
+
void OperationValidator::operator()()
{
// There is no reason for each subgraph to have subgraphs since compiler has subgraphs when
[&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
}
-void OperationValidator::visit(const ir::operation::Abs &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(0)};
-
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
+void OperationValidator::visit(const ir::operation::Abs &node) { checkUnaryOp(node); }
void OperationValidator::visit(const ir::operation::AvgPool2D &node)
{
num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
}
-void OperationValidator::visit(const ir::operation::Round &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Round::Input::INPUT)};
-
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
+void OperationValidator::visit(const ir::operation::Round &node) { checkUnaryOp(node); }
void OperationValidator::visit(const ir::operation::SpaceToBatchND &node)
{
}
}
-void OperationValidator::visit(const ir::operation::Exp &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
-
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
+void OperationValidator::visit(const ir::operation::Exp &node) { checkUnaryOp(node); }
void OperationValidator::visit(const ir::operation::ExpandDims &node)
{
OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1);
}
-void OperationValidator::visit(const ir::operation::Floor &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
-
- OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-
- if (_ctx.at(output_index).info().isDynamic())
- return;
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
+void OperationValidator::visit(const ir::operation::Floor &node) { checkUnaryOp(node); }
void OperationValidator::visit(const ir::operation::HashtableLookup &node)
{
}
}
+void OperationValidator::visit(const ir::operation::L2Normalization &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ if (_ctx.at(ofm_index).info().isDynamic())
+ return;
+
+ const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
+
+ auto ifm_shape = _ctx.at(ifm_index).shape();
+ auto ofm_shape = _ctx.at(ofm_index).shape();
+
+ OP_REQUIRES(ifm_shape.rank() == ofm_shape.rank());
+
+ for (auto i = 0; i < ifm_shape.rank(); i++)
+ {
+ OP_REQUIRES(ifm_shape.dim(i) == ofm_shape.dim(i));
+ }
+}
+
void OperationValidator::visit(const ir::operation::Unpack &node)
{
const auto num{node.param().num};
OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0);
}
-void OperationValidator::visit(const ir::operation::Cos &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
+void OperationValidator::visit(const ir::operation::Cos &node) { checkUnaryOp(node); }
- const auto input_index{node.getInputs().at(0)};
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
-void OperationValidator::visit(const ir::operation::Sin &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
+void OperationValidator::visit(const ir::operation::Sin &node) { checkUnaryOp(node); }
- const auto input_index{node.getInputs().at(0)};
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
+void OperationValidator::visit(const ir::operation::RSQRT &node) { checkUnaryOp(node); }
-void OperationValidator::visit(const ir::operation::RSQRT &node)
+void OperationValidator::visit(const ir::operation::Shape &node)
{
const auto output_index{node.getOutputs().at(0)};
if (_ctx.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(0)};
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+ UNUSED_RELEASE(input_index);
+ OP_REQUIRES(_ctx.at(output_index).shape().rank() == 1);
}
-void OperationValidator::visit(const ir::operation::Shape &node)
+void OperationValidator::visit(const ir::operation::ResizeBilinear &node)
{
const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
+
if (_ctx.at(output_index).info().isDynamic())
+ {
return;
+ }
+ OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4);
+ OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4);
- const auto input_index{node.getInputs().at(0)};
- UNUSED_RELEASE(input_index);
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 1);
+ auto align_corners = node.param().align_corners;
+ auto half_pixel_centers = node.param().half_pixel_centers;
+
+ OP_REQUIRES(!align_corners || !half_pixel_centers);
}
void OperationValidator::visit(const ir::operation::Reverse &node)
// TODO Add to validate with subgraphs
}
-void OperationValidator::visit(const ir::operation::Neg &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
+void OperationValidator::visit(const ir::operation::Neg &node) { checkUnaryOp(node); }
- const auto input_index{node.getInputs().at(0)};
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
+void OperationValidator::visit(const ir::operation::Log &node) { checkUnaryOp(node); }
-void OperationValidator::visit(const ir::operation::Log &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(0)};
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
-void OperationValidator::visit(const ir::operation::LogicalNot &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
- return;
-
- const auto input_index{node.getInputs().at(0)};
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
+void OperationValidator::visit(const ir::operation::LogicalNot &node) { checkUnaryOp(node); }
void OperationValidator::visit(const ir::operation::SquaredDifference &node)
{
OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
}
+
+void OperationValidator::visit(const ir::operation::Quantize &node)
+{
+ VERBOSE(Quantize) << "Configure Quantize operation" << std::endl;
+
+ OP_REQUIRES(node.getInputs().size() == 1);
+ OP_REQUIRES(node.getOutputs().size() == 1);
+
+ const auto input_index{node.getInputs().at(0)};
+ const auto output_index{node.getOutputs().at(0)};
+
+ OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32);
+
+ if (_ctx.at(output_index).info().isDynamic())
+ return;
+
+ OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
+
+ OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+}
} // namespace compiler
} // namespace onert
void visit(const ir::operation::DepthToSpace &node) override;
void visit(const ir::operation::Pack &node) override;
void visit(const ir::operation::LSTM &node) override;
+ void visit(const ir::operation::L2Normalization &node) override;
void visit(const ir::operation::Unpack &node) override;
void visit(const ir::operation::Pad &node) override;
void visit(const ir::operation::Min &node) override;
void visit(const ir::operation::Sin &node) override;
void visit(const ir::operation::RSQRT &node) override;
void visit(const ir::operation::Shape &node) override;
+ void visit(const ir::operation::ResizeBilinear &node) override;
void visit(const ir::operation::Reverse &node) override;
void visit(const ir::operation::If &node) override;
void visit(const ir::operation::While &node) override;
void visit(const ir::operation::Range &node) override;
void visit(const ir::operation::MatrixBandPart &node) override;
void visit(const ir::operation::LogSoftmax &node) override;
+ void visit(const ir::operation::Quantize &node) override;
private:
- void checkReduceOp(const ir::OperandIndex input_index, const ir::OperandIndex output_index);
+ void checkUnaryOp(const ir::Operation &node);
private:
// TODO Remove _ctx field
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Logistic::Input::INPUT));
}
+void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
+{
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::Input::INPUT));
+}
+
void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op)
{
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT));
}
}
+void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op)
+{
+ const auto input_idx{op.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
+ const auto &input = _operands.at(input_idx);
+
+ // get mutable output operand
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = _operands.at(output_idx);
+
+ // if input is dynamic, output also becomes dynamic
+ if (input.info().isDynamic())
+ {
+ output.info().setDynamic();
+ _return_has_dynamic_tensor = true;
+ return;
+ }
+
+ // Shape inferencing logic based on Params
+ ir::Shape new_shape = shape_inference::inferResizeBilinearShape(
+ input.shape(), op.param().height_out, op.param().width_out);
+
+ // if size_op is from Const, TFLC put the shape of output into tensor
+ if (new_shape != output.shape())
+ {
+ // change on output shape
+ output.info().shape(new_shape);
+ }
+}
+
void StaticShapeInferer::visit(const ir::operation::Reverse &op)
{
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::Input::INPUT));
#include "backend/Backend.h"
#include "backend/controlflow/Config.h"
#include "backend/controlflow/TensorBuilder.h"
+#include "util/logging.h"
namespace onert
{
return _cf_tensor_builder;
}
+ std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind)
+ {
+ for (auto &tensor_builder : _tensor_builders)
+ {
+ auto tensor = tensor_builder->tensorAt(ind);
+ if (tensor)
+ return tensor;
+ }
+ return nullptr;
+ }
+
private:
std::unordered_set<std::shared_ptr<backend::ITensorBuilder>> _tensor_builders;
std::shared_ptr<backend::controlflow::TensorBuilder> _cf_tensor_builder;
#include "exec/DynamicShapeInference.h"
#include "util/ShapeInference.h"
+#include <assert.h>
namespace onert
{
namespace exec
{
+inline backend::IDynamicTensorManager *
+dynamicTensorManagerOf(const std::shared_ptr<backend::ITensor> &tensor)
+{
+ if (!tensor->dynamic_tensor_manager())
+ throw std::runtime_error{"Dynamic Tensor Manager is not available for this tensor."};
+ return tensor->dynamic_tensor_manager();
+}
+
void DynamicShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
const ir::OperandIndex lhs_idx,
const ir::OperandIndex rhs_idx)
ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
- _dynamic_tensor_manager->applyShape(output_idx, new_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_idx, new_shape);
assert(output->buffer() != nullptr);
}
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
assert(output->buffer() != nullptr);
}
ir::Shape new_shape = shape_inference::inferArgMaxShape(input_shape, axis, rank);
- _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
assert(output->buffer() != nullptr);
}
// TODO
auto new_shape = shape_inference::inferBatchMatMulShape(lhs_shape, rhs_shape, op.param());
- _dynamic_tensor_manager->applyShape(output_index, new_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_index, new_shape);
}
void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
shape->getShape(), reinterpret_cast<const int32_t *>(shape->buffer()));
// set output shape and output buffer
- _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
assert(output->buffer() != nullptr);
}
auto output = _tensor_registry->getITensor(output_ind);
auto output_shape = shape_inference::inferConcatShape(in_shapes, op.param());
- _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
}
void DynamicShapeInferer::visit(const ir::operation::Conv2D &op)
ir::Shape output_shape = shape_inference::inferConv2DShape(input_shape, ker_shape, op.param());
- _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
assert(output->buffer() != nullptr);
}
auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_buf[0]);
- _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
assert(output->buffer() != nullptr);
}
auto output_shape = shape_inference::inferFillShape(input_shape, input_buf);
- _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
assert(output->buffer() != nullptr);
}
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
assert(output->buffer() != nullptr);
}
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
assert(output->buffer() != nullptr);
}
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Logistic::INPUT));
}
+void DynamicShapeInferer::visit(const ir::operation::L2Normalization &op)
+{
+ handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::INPUT));
+}
+
void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op)
{
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT));
const auto axis_val = op.param().axis;
ir::Shape new_shape = shape_inference::inferOnehotShape(indices_shape, *depth_buf, axis_val);
- _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
assert(output->buffer() != nullptr);
}
ir::Shape new_shape = shape_inference::inferPackShape(input_shape, axis, rank, num);
- _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
assert(output->buffer() != nullptr);
}
shape_inference::inferPadShape(input->getShape(), pad_buf, pad->getShape().num_elements());
// change output shape and reallocate output tensor memory
- _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
assert(output->buffer() != nullptr);
}
*reinterpret_cast<int32_t *>(limit_tensor->buffer()),
*reinterpret_cast<int32_t *>(delta_tensor->buffer()));
}
- _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
assert(output->buffer() != nullptr);
}
ir::Shape new_shape = shape_inference::inferReduceShape(input_shape, axes_vec, keep_dims);
- _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
assert(output->buffer() != nullptr);
}
if (output_shape != output->getShape() || output->buffer() == nullptr)
{
// change on output shape
- _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
}
assert(output->buffer() != nullptr);
}
if (output_shape != output->getShape() || output->buffer() == nullptr)
{
// change on output shape
- _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
}
assert(output->buffer() != nullptr);
}
}
}
+void DynamicShapeInferer::visit(const ir::operation::ResizeBilinear &op)
+{
+ // check if output is not dynamic
+ auto output_ind = op.getOutputs().at(0);
+ auto output = _tensor_registry->getITensor(output_ind);
+
+ auto input_ind = op.getInputs().at(ir::operation::Reshape::Input::INPUT);
+ auto input = _tensor_registry->getITensor(input_ind);
+
+ if ((!input->is_dynamic()) && (!output->is_dynamic()))
+ return;
+
+ // getting output shape from input shape and Params
+ auto output_shape = shape_inference::inferResizeBilinearShape(
+ input->getShape(), op.param().height_out, op.param().width_out);
+
+ // if shape is changed, change output shape and reallocate output tensor memory
+ if (output_shape != output->getShape() || output->buffer() == nullptr)
+ {
+ // change on output shape
+ _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+ }
+ assert(output->buffer() != nullptr);
+}
+
void DynamicShapeInferer::visit(const ir::operation::Reverse &op)
{
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::INPUT));
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
assert(output->buffer() != nullptr);
}
ir::Shape output_shape;
output_shape.append(input_shape.rank());
- _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
assert(output->buffer() != nullptr);
}
ir::Shape new_shape = shape_inference::inferSliceShape(input_shape, begins_buf, sizes_buf);
- _dynamic_tensor_manager->applyShape(output_index, new_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_index, new_shape);
assert(output->buffer() != nullptr);
}
ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape(
input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
- _dynamic_tensor_manager->applyShape(output_idx, new_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_idx, new_shape);
assert(output->buffer() != nullptr);
}
auto output_ind = op.getOutputs().at(out_tensor_idx);
auto output = _tensor_registry->getITensor(output_ind);
- _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
assert(output->buffer() != nullptr);
}
}
auto output_ind = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_ind);
- _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
assert(output->buffer() != nullptr);
}
ir::Shape output_shape =
onert::shape_inference::inferStridedSliceShape(input_shape, op_params, rank);
- _dynamic_tensor_manager->applyShape(output_index, output_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_index, output_shape);
assert(output->buffer() != nullptr);
}
auto output_shape = shape_inference::inferTileShape(input_shape, multiplier_buffer);
// set output shape and output buffer
- _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
assert(output->buffer() != nullptr);
}
// set output shape, based on input and params
ir::Shape new_shape = shape_inference::inferTransposeShape(input_shape, perm);
- _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
assert(output->buffer() != nullptr);
}
auto output_ind = op.getOutputs().at(out_tensor_idx);
auto output = _tensor_registry->getITensor(output_ind);
- _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+ dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
assert(output->buffer() != nullptr);
}
{
auto tensor_registry = tensor_builder->tensorRegistry();
assert(tensor_registry);
- tensor = tensor_registry->getManagedITensor(ind);
+ tensor = tensor_registry->getNativeITensor(ind);
if (tensor != nullptr)
{
if (tensor_builder->supportDynamicTensor())
{
auto tensor_registry = tensor_builder->tensorRegistry();
assert(tensor_registry);
- tensor = tensor_registry->getManagedITensor(ind);
+ tensor = tensor_registry->getNativeITensor(ind);
if (tensor != nullptr)
{
if (tensor_builder->supportDynamicTensor())
#include "ir/LowerInfoMap.h"
#include "backend/IConfig.h"
#include "backend/Backend.h"
-#include "compiler/OperandContext.h"
#include "exec/ExecTime.h"
#include "exec/IFunction.h"
#include "backend/IDynamicTensorManager.h"
int32_t data_offset() const override { return _info.typeInfo().offset(); }
const ir::OperandInfo &tensorInfo() const override { return _info; }
uint64_t num_elements() const override { return _info.shape().num_elements(); };
+ backend::IDynamicTensorManager *dynamic_tensor_manager() override { return nullptr; }
private:
const ir::OperandInfo _info;
const int32_t *pad_ptr = reinterpret_cast<const int32_t *>(pad_buffer);
float *output_ptr = reinterpret_cast<float *>(output_buffer);
- nnfw::cker::Pad(pad_ptr, pad_rank, cker_input_shape, input_ptr, cker_output_shape, output_ptr,
- nullptr);
+ nnfw::cker::Pad<float>(pad_ptr, pad_rank, cker_input_shape, input_ptr, cker_output_shape,
+ output_ptr, nullptr);
}
void invokePad(const ExecEnv *env, const ir::Operation &node)
auto outputs = node.getOutputs();
for (auto output : outputs)
{
- operands().at(output).insertDef(index);
+ operands().at(output).setDef(index);
}
for (auto input : node.getInputs() | ir::Remove::UNDEFINED)
#include "pass/ConstantLoweringPass.h"
#include "pass/PermutationOperationPass.h"
#include "pass/PermutationInsertionPass.h"
+#include "pass/PermutationEliminationPass.h"
#include "ir/GraphIterator.h"
#include "verifier/Verifier.h"
#include "backend/Backend.h"
pass::PermutationInsertionPass pi_pass(*this);
pi_pass.run();
- // Implemented code no longer works.
- // pass::PermutationEliminationPass pe_pass(*this);
- // pe_pass.run();
+
+ pass::PermutationEliminationPass pe_pass(*this);
+ pe_pass.run();
_op_seqs.dump("merged and sorted operations with permutation", _graph.operations());
}
const auto lower_info = getLowerInfo(index);
const auto &shape = object.shape();
- std::string def_ops = operation_index_to_string(object.getDef());
+ std::string def_ops =
+ object.getDef().valid() ? std::to_string(object.getDef().value()) : "N/A";
std::string use_ops = operation_index_to_string(object.getUses());
std::string def_layouts = factors_to_string(lower_info->def_factors());
std::string use_layouts = factors_to_string(lower_info->use_factors());
for (const auto &input : op_seq.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
{
const auto &input_obj = _graph.operands().at(input);
- for (const auto &def : input_obj.getDef())
+ auto def = input_obj.getDef();
+ if (def.valid())
{
branched_set.insert(def);
if (branched_set.size() > 1)
// Check for branching down
for (const auto &output : node.getOutputs() | Remove::DUPLICATED)
{
+ // TODO Fix this workaround for the case of model outputs that are used by another operation
+ // This is needed since the branching is decided by operation, but for model outputs,
+ // there is controlflow backen(use backend) but no actual use operation exists
+ if (_graph.getOutputs().contains(output))
+ return false;
+
const auto &output_obj = _graph.operands().at(output);
for (const auto &use : output_obj.getUses())
{
void Operand::removeUse(const OperationIndex &idx) { _uses.remove(idx); }
-void Operand::insertDef(const OperationIndex &idx)
-{
- assert(!isConstant());
- assert(_def.size() == 0);
-
- _def.insert(idx);
-}
+void Operand::setDef(const OperationIndex &idx) { _def = idx; }
-void Operand::removeDef(const OperationIndex &idx)
-{
- assert(_def.contains(idx));
-
- _def.remove(idx);
-}
+void Operand::unsetDef() { _def = OperationIndex{}; }
} // namespace ir
} // namespace onert
VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
}
+void OperationDumper::visit(const StatelessRandomUniform &node)
+{
+ VERBOSE(LIR) << "* StatelessRandomUniform" << std::endl;
+ VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(StatelessRandomUniform::Input::SHAPE)
+ << ", " << node.getInputs().at(StatelessRandomUniform::Input::SEED) << ")"
+ << std::endl;
+ VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+}
+
void OperationDumper::visit(const Squeeze &node)
{
VERBOSE(LIR) << "* Squeeze" << std::endl;
void visit(const operation::Squeeze &) override;
void visit(const operation::Slice &) override;
void visit(const operation::StridedSlice &) override;
+ void visit(const operation::StatelessRandomUniform &) override;
void visit(const operation::Sub &) override;
void visit(const operation::Tanh &) override;
void visit(const operation::Tile &) override;
BatchToSpaceND::BatchToSpaceND(const OperandIndexSequence &inputs,
const OperandIndexSequence &outputs)
- : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}
{
}
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Quantize.h"
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Quantize::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Quantize::Quantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ir/operation/SplitV.h"
+#include <cassert>
+#include "ir/OperationVisitor.h"
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+void SplitV::accept(OperationVisitor &v) const { v.visit(*this); }
+SplitV::SplitV(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param ¶m)
+ : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+{
+}
+} // namespace operation
+} // namespace ir
+} // namespace onert
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#include "OperandContext.h"
+#include "ir/operation/StatelessRandomUniform.h"
#include <cassert>
+#include "ir/OperationVisitor.h"
+
namespace onert
{
-namespace compiler
+namespace ir
{
-
-OperandContext &OperandContext::set(const ir::OperandIndex &id,
- const std::shared_ptr<backend::ITensor> &tensor)
+namespace operation
{
- // Only one tensor for an id
- assert(_tensors.find(id) == _tensors.end());
- _tensors[id] = tensor;
- return (*this);
-}
+void StatelessRandomUniform::accept(OperationVisitor &v) const { v.visit(*this); }
-void OperandContext::iterate(
- const std::function<void(const ir::OperandIndex &, backend::ITensor &)> &fn)
+StatelessRandomUniform::StatelessRandomUniform(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs)
+ : Operation{OperandConstraint::createExact(2u), inputs, outputs}
{
- for (auto &e : _tensors)
- {
- fn(e.first, *e.second);
- }
}
-} // namespace compiler
+} // namespace operation
+} // namespace ir
} // namespace onert
if (_replace_operands_map.count(key) == 0)
{
auto new_object = object;
+ new_object.unsetDef();
// TODO Remove const_case
- const_cast<OperationIndexSet &>(new_object.getDef()).clear();
const_cast<OperationIndexSet &>(new_object.getUses()).clear();
const auto new_index = _graph.operands().emplace(new_object);
_replace_operands_map[key] = new_index;
// Remove this node from uses of origin operand
// Constant operand has no def.
- assert(object.getDef().size() == 0);
+ assert(!object.getDef().valid());
object.removeUse(node_index);
// Remove origin operand
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
*/
#include "PermutationEliminationPass.h"
+#include "backend/controlflow/Config.h"
-#include "ir/Operand.h"
-#include "ir/operand/LowerInfo.h"
-#include "ir/Graph.h"
-#include "backend/IConfig.h"
#include "util/logging.h"
namespace onert
{
namespace pass
{
-void PermutationEliminationPass::callback(const OperandIndex &inp_index, Operand &object)
-{
- if (_graph.getInputs().contains(inp_index))
- {
- eliminateInput(inp_index, object);
- }
- else if (_graph.getOutputs().contains(inp_index))
- {
- eliminateOutput(inp_index, object);
- }
-}
-void PermutationEliminationPass::eliminateInput(const OperandIndex &inp_index, Operand &object)
+void PermutationEliminationPass::callback(const OperationIndex &ind, Operation &node)
{
- auto &model_inputs = _graph.getInputs();
-
- // get uses of the model's given input
- auto uses = object.getUses();
+ _op_ind = ind;
+ node.accept(*this);
+};
- // input must be used just by permutation
- if (uses.size() != 1)
- {
- return;
- }
+void PermutationEliminationPass::visit(const operation::Permute &node)
+{
+ auto in_operand = node.getInputs().at(0);
+ auto out_operand = node.getOutputs().at(0);
- for (auto input_use : uses)
+ // Check if two tensors are both portable
+ // TODO Make this general, this is just a workaround to check two tensors are portable
{
- auto &perm_operation = _graph.operations().at(input_use);
- auto perm_inputs = perm_operation.getInputs();
+ auto in_def_factor = _lowered_graph.getLowerInfo(in_operand)->def_factors().getOnlyElement();
+ auto out_def_factor = _lowered_graph.getLowerInfo(out_operand)->def_factors().getOnlyElement();
- auto perm_outputs = perm_operation.getOutputs();
+ auto in_backend_id = in_def_factor.backend()->config()->id();
+ auto out_backend_id = out_def_factor.backend()->config()->id();
- if (!isPermuteLayerToEliminate(perm_inputs, perm_outputs, true))
- {
+ // TODO Fix this workaround that removes only Permute between cpu and controlflow backend.
+ // This should be general.
+ if (!((in_backend_id == backend::controlflow::Config::ID && out_backend_id == "cpu") ||
+ (in_backend_id == "cpu" && out_backend_id == backend::controlflow::Config::ID)))
return;
- }
-
- assert(perm_inputs.at(0) == inp_index);
-
- VERBOSE(PermutationEliminationPass::EliminateInput) << "remove NHWC_TO_NCHW permutation\n";
-
- // set model's new input, which was output of permutation
- model_inputs.replace(inp_index, perm_outputs.at(0));
-
- // remove model's input, which is also input of permutation
- _graph.removeOperand(inp_index);
-
- // remove permutation operation
- assert(_lowered_graph.op_seqs().containsOperation(input_use));
- auto op_seq_idx = _lowered_graph.op_seqs().getOperation(input_use);
- _lowered_graph.op_seqs().remove(op_seq_idx);
- _graph.operations().remove(input_use);
-
- VERBOSE(PermutationEliminationPass::EliminateInput)
- << inp_index.value() << " is model's input and is removed. New input is "
- << perm_outputs.at(0).value() << "\n"
- << input_use.value() << " is removed permutation operation\n";
- }
-}
-
-void PermutationEliminationPass::eliminateOutput(const OperandIndex &out_index, Operand &object)
-{
- auto &model_outputs = _graph.getOutputs();
-
- // get defs of the model's given output
- auto defs = object.getDef();
-
- // output must use just permutation
- if (defs.size() != 1)
- {
- return;
}
- for (auto output_def : defs)
+ if (_graph.getOutputs().contains(out_operand))
{
- auto &perm_operation = _graph.operations().at(output_def);
- auto perm_outputs = perm_operation.getOutputs();
-
- auto perm_inputs = perm_operation.getInputs();
- if (!isPermuteLayerToEliminate(perm_inputs, perm_outputs, false))
+ // Exceptional case : When the output operand is a model output
+ // In this case we keep the output and remove the input
+
+ auto &out_operand_obj = _graph.operands().at(out_operand);
+ assert(out_operand_obj.getDef() == _op_ind);
+ out_operand_obj.unsetDef();
+ _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
+ if (!op_seq.getOutputs().contains(in_operand))
+ return;
+
+ // Update OpSequence/Operation edges and Operand edges
+ op_seq.replaceOutputs(in_operand, out_operand);
+ for (auto op : op_seq.operations())
+ {
+ auto &operation_obj = _graph.operations().at(op);
+ if (operation_obj.getOutputs().contains(in_operand))
+ {
+ operation_obj.replaceOutputs(in_operand, out_operand);
+ out_operand_obj.setDef(op);
+ }
+ }
+ });
+
+ // Remove Permute operation, enclosing OpSequence and the operand
{
- return;
- }
-
- assert(perm_outputs.at(0) == out_index);
+ _graph.removeOperand(in_operand);
- VERBOSE(PermutationEliminationPass::EliminateOutput) << "remove NCHW_TO_NHWC permutation\n";
-
- // Update operations' output that is used by permute operand
- for (auto perm_input_index : perm_inputs)
- {
- auto &perm_input_operand = _graph.operands().at(perm_input_index);
- perm_input_operand.removeUse(output_def);
+ auto op_seq_ind = _lowered_graph.op_seqs().getOperation(_op_ind);
+ // Assumes enclosing OpSequence contatins just this Permute operation
+ assert(_lowered_graph.op_seqs().at(op_seq_ind).size() == 1);
+ _lowered_graph.op_seqs().remove(op_seq_ind);
+ _graph.operations().remove(_op_ind);
}
- // set model's new output, which was input of permutation
- model_outputs.replace(out_index, perm_inputs.at(0));
-
- // remove model's output, which is also output of permutation
- _graph.removeOperand(out_index);
-
- // remove permutation operation
- assert(_lowered_graph.op_seqs().containsOperation(output_def));
- auto op_seq_idx = _lowered_graph.op_seqs().getOperation(output_def);
- _lowered_graph.op_seqs().remove(op_seq_idx);
- _graph.operations().remove(output_def);
-
- VERBOSE(PermutationEliminationPass::EliminateOutput)
- << out_index.value() << " is model's output and is removed. New output is "
- << perm_inputs.at(0).value() << "\n"
- << output_def.value() << " is removed permutation operation\n";
+ _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
+ if (!op_seq.getInputs().contains(in_operand))
+ return;
+
+ op_seq.replaceInputs(in_operand, out_operand);
+ for (auto op : op_seq.operations())
+ {
+ auto &operation_obj = _graph.operations().at(op);
+ if (operation_obj.getInputs().contains(in_operand))
+ {
+ operation_obj.replaceInputs(in_operand, out_operand);
+ out_operand_obj.insertUse(op);
+ }
+ }
+ });
+
+ VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
+ VERBOSE(removePermute) << " - Input (removed) Operand : " << in_operand << std::endl;
+ VERBOSE(removePermute) << " - Output(kept) Operand : " << out_operand << std::endl;
}
-}
-
-bool PermutationEliminationPass::isPermuteLayerToEliminate(const OperandIndexSequence &inp_indexes,
- const OperandIndexSequence &out_indexes,
- bool is_for_model_input)
-{
- auto input_def_factors = _lowered_graph.getLowerInfo(inp_indexes.at(0))->def_factors();
- auto output_def_factors = _lowered_graph.getLowerInfo(out_indexes.at(0))->def_factors();
-
- auto input_layout = input_def_factors.getOnlyElement().layout();
- auto output_layout = output_def_factors.getOnlyElement().layout();
-
- if (input_def_factors.size() != 1 || output_def_factors.size() != 1)
- {
- return false;
- }
-
- // all operands' factor must be the same
- for (auto index : inp_indexes)
- {
- auto op_factor_set = _lowered_graph.getLowerInfo(index)->def_factors();
- if (op_factor_set.size() != 1 ||
- input_layout != _lowered_graph.getLowerInfo(index)->def_factors().getOnlyElement().layout())
- {
- return false;
- }
- }
- // all operands' factor must be the same
- for (auto index : out_indexes)
+ else
{
- auto op_factor_set = _lowered_graph.getLowerInfo(index)->def_factors();
- if (op_factor_set.size() != 1 ||
- output_layout !=
- _lowered_graph.getLowerInfo(index)->def_factors().getOnlyElement().layout())
+ // Otherwise keep the input and remove the output
+
+ auto &in_operand_obj = _graph.operands().at(in_operand);
+ in_operand_obj.removeUse(_op_ind);
+
+ // Make OpSequences(that use the output) use the input
+ _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
+ if (!op_seq.getInputs().contains(out_operand))
+ return;
+
+ op_seq.replaceInputs(out_operand, in_operand);
+ for (auto op : op_seq.operations())
+ {
+ auto &operation_obj = _graph.operations().at(op);
+ if (operation_obj.getInputs().contains(out_operand))
+ {
+ operation_obj.replaceInputs(out_operand, in_operand);
+ in_operand_obj.insertUse(op);
+ }
+ }
+ });
+
+ // Remove Permute operation, enclosing OpSequence and the operand
{
- return false;
+ _graph.removeOperand(out_operand);
+
+ auto op_seq_ind = _lowered_graph.op_seqs().getOperation(_op_ind);
+ // Assumes enclosing OpSequence contatins just this Permute operation
+ assert(_lowered_graph.op_seqs().at(op_seq_ind).size() == 1);
+ _lowered_graph.op_seqs().remove(op_seq_ind);
+ _graph.operations().remove(_op_ind);
}
- }
- if (is_for_model_input)
- {
- // check if this is NHWC_TO_NCHW permutation: must have single input, which is model's input
- return (inp_indexes.size() == 1 && input_layout == Layout::NHWC &&
- output_layout == Layout::NCHW);
+ VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
+ VERBOSE(removePermute) << " - Input (kept) Operand : " << in_operand << std::endl;
+ VERBOSE(removePermute) << " - Output(removed) Operand : " << out_operand << std::endl;
}
-
- // check if this is NCHW_TO_NHWC permutation: must have single output, which is model's output
- return (out_indexes.size() == 1 && input_layout == Layout::NCHW && output_layout == Layout::NHWC);
}
} // namespace pass
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#ifndef __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
#define __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
-#include "LoweredOperandPass.h"
-#include "ir/Operand.h"
-#include "ir/OperandIndexSequence.h"
+#include "ir/OperationVisitor.h"
+#include "LoweredOperationPass.h"
namespace onert
{
namespace pass
{
-class PermutationEliminationPass : public LoweredOperandPass
+/**
+ * @brief An optimization pass that removes Permute operations if possible
+ *
+ * There may be some Permute operations that are inserted by PermutationInsertionPass or other
+ * passes. This pass checks all Permute operations and eliminates them if Permute in/out tensors
+ * are compatible and layouts match.
+ *
+ * Permute input tensor is kept and the output is removed for all the cases, except model outputs.
+ * As all output tensors have to be controlflow backend, so the output is kept.
+ *
+ * @note This is an optimization pass which means that everything should work fine even if this pass
+ * was skipped.
+ */
+class PermutationEliminationPass : public LoweredOperationPass, public OperationVisitor
{
public:
- using LoweredOperandPass::LoweredOperandPass;
+ using LoweredOperationPass::LoweredOperationPass;
public:
- std::string id() override { return "PermutationEliminationPass"; }
+ std::string id() final { return "PermutationEliminationPass"; }
- void callback(const OperandIndex &index, Operand &object) override;
+public:
+ void callback(const OperationIndex &i, Operation &n) final;
private:
- /**
- * @brief Remove Permute operation that permutates input
- *
- * Note: This function aslo removes model's input and
- * sets output of permutation as model's new input
- *
- * @param inp_index is the target operand index for the elimination
- * @param object is the target operand object for the elimination
- *
- * @return
- */
- void eliminateInput(const OperandIndex &inp_index, Operand &object);
-
- /**
- * @brief Remove Permute operation that permutates output of a model
- *
- * Note: This function aslo removes model's output and
- * sets input of permutation as model's new output
- *
- * @param out_index is the target operand index for the elimination
- * @param object is the target operand object for the elimination
- *
- * @return
- */
- void eliminateOutput(const OperandIndex &out_index, Operand &object);
+ void visit(const operation::Permute &) final;
- /**
- * @brief Determine if passed operands are permute layer's input and output, that must be
- * eliminated
- *
- * @param inp_index indexes of the input operand to operation
- * @param out_index indexes of the output operand to operation
- * @param is_for_model_input checking for model's input or output
- *
- * @return if it is permutation layer
- */
- bool isPermuteLayerToEliminate(const OperandIndexSequence &inp_indexes,
- const OperandIndexSequence &out_indexes, bool is_for_model_input);
+private:
+ ir::OperationIndex _op_ind;
};
} // namespace pass
}
auto insert_set = operand_li->use_factors() - operand_li->def_factors();
- auto def_factor = operand_li->def_factors().getOnlyElement();
-
- auto compatible_backends = [](auto /* backend1 */, auto /* backend2 */) {
- // TODO If other issues for Permute elimination are resolved, enable this
- return false;
- /*
- // TODO This is a workaround for not inserting Permute between cpu and controlflow.
- // To be general, we need another way of checking they are compatible.
- const auto cf = backend::controlflow::Config::ID;
- const auto cpu = "cpu";
- const auto id1 = backend1->config()->id();
- const auto id2 = backend2->config()->id();
- return (id1 == cpu && id2 == cf) // Allows no-Permute for Model inputs
- || (id1 == cf && id2 == cpu); // Allows no-Permute for Model outputs
- */
- };
-
for (auto factor : insert_set)
{
- if (factor.layout() == def_factor.layout() &&
- compatible_backends(factor.backend(), def_factor.backend()))
- {
- // For this factor we can just reuse existing operand - Permute is not added.
- VERBOSE(PermutationInsertionPass) << "Permutation Insertion is skipped for operand "
- << index << " / as the tensor is compatible with backend "
- << factor.backend()->config()->id() << std::endl;
- factor_to_index.emplace(factor, index);
- continue;
- }
-
const auto permute_operation_index = insertPermute(index, factor);
permute_indexes.push_back(permute_operation_index);
const auto &permute_operation = _graph.operations().at(permute_operation_index);
// Update Use/Def info
{
_graph.operands().at(operand_index).insertUse(node_index);
- _graph.operands().at(out_operand_index).insertDef(node_index);
+ _graph.operands().at(out_operand_index).setDef(node_index);
}
return node_index;
}
std::string id() override { return "PermutationInsertionPass"; }
void callback(const OperandIndex &index, Operand &object) override;
+private:
/**
* @brief Insert Permute operation that has given operand as input
*
*/
OperationIndex insertPermute(const OperandIndex &operand_index,
const operand::PermuteFactor &factor);
-
-private:
};
} // namespace pass
const auto &output_ind = node.getOutputs().at(0);
const auto &output = _graph.operands().at(output_ind);
- assert(output.getDef().size() == 1);
- const auto &node_index = *output.getDef().begin();
+ assert(output.getDef().valid());
+ const auto node_index = output.getDef();
const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
const auto frontend_layout = _lowered_graph.op_seqs().at(op_seq_index).getLayout();
const auto backend_layout = _lowered_graph.getLowerInfo(op_seq_index)->layout();
const auto &output_ind = node.getOutputs().at(0);
const auto &output_obj = _graph.operands().at(output_ind);
- assert(output_obj.getDef().size() == 1);
- const auto &node_index = *output_obj.getDef().begin();
+ assert(output_obj.getDef().valid());
+ const auto node_index = output_obj.getDef();
const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
const auto frontend_layout = _lowered_graph.op_seqs().at(op_seq_index).getLayout();
lower_info->addUsePermuteFactor(new_factor);
// Whether if node's input is an input of model or a constant
- if (_graph.operands().at(input).getDef().size() == 0 &&
+ if (!_graph.operands().at(input).getDef().valid() &&
(lower_info->def_factors().size() == 1 &&
lower_info->def_factors().getOnlyElement() == removed_factor))
{
// DAGChecker
//
-bool DAGChecker::verify(const Graph &graph) const
+bool DAGChecker::verify(const Graph &graph) const noexcept
{
auto &operations = graph.operations();
bool cyclic = false;
// EdgeConsistencyVerifier
//
-bool EdgeConsistencyChecker::verify(const Graph &graph) const
+bool EdgeConsistencyChecker::verify(const Graph &graph) const noexcept
{
auto &operations = graph.operations();
- uint32_t mismatches = 0;
+ uint32_t errors = 0;
operations.iterate([&](const OperationIndex &index, const Operation &node) {
for (auto operand_index : node.getInputs() | ir::Remove::UNDEFINED)
{
- auto &operand = graph.operands().at(operand_index);
- mismatches += (operand.getUses().contains(index) ? 0 : 1);
+ try
+ {
+ auto &operand = graph.operands().at(operand_index);
+ bool operand_has_use = operand.getUses().contains(index);
+ if (!operand_has_use)
+ {
+ VERBOSE(EdgeConsistencyChecker) << "[ERROR] EDGE MISMATCH : Missing USE edge - Operand "
+ << operand_index << " to Operation " << index
+ << std::endl;
+ errors += 1;
+ }
+ }
+ catch (const std::out_of_range &e)
+ {
+ VERBOSE(EdgeConsistencyChecker)
+ << "[ERROR] OPEARAND NOT FOUND : Operation " << index << " has Operand "
+ << operand_index << ", but the operand object is not present in the graph" << std::endl;
+ errors += 1;
+ }
}
for (auto operand_index : node.getOutputs())
{
- auto &operand = graph.operands().at(operand_index);
- mismatches += (operand.getDef().contains(index) ? 0 : 1);
+ try
+ {
+ auto &operand = graph.operands().at(operand_index);
+ if (operand.getDef() != index)
+ {
+ VERBOSE(EdgeConsistencyChecker) << "[ERROR] EDGE MISMATCH : Missing DEF edge - Operand"
+ << operand_index << " to Operation " << index
+ << std::endl;
+ errors += 1;
+ }
+ }
+ catch (const std::out_of_range &e)
+ {
+ VERBOSE(EdgeConsistencyChecker)
+ << "[ERROR] OPEARAND NOT FOUND : Operation " << index << " has Operand "
+ << operand_index << ", but the operand object is not present in the graph" << std::endl;
+ errors += 1;
+ }
}
});
- return mismatches == 0;
+
+ VERBOSE(EdgeConsistencyChecker) << "Total Number of errors : " << errors << std::endl;
+
+ return errors == 0;
}
} // namespace verifier
struct IVerifier
{
virtual ~IVerifier() = default;
- virtual bool verify(const Graph &graph) const = 0;
+ virtual bool verify(const Graph &graph) const noexcept = 0;
};
} // namespace verifier
class DAGChecker : public IVerifier
{
public:
- bool verify(const Graph &graph) const override;
+ bool verify(const Graph &graph) const noexcept override;
};
class EdgeConsistencyChecker : public IVerifier
{
public:
- bool verify(const Graph &graph) const override;
+ bool verify(const Graph &graph) const noexcept override;
};
} // namespace verifier
std::string _ts;
};
-void emit_rusage(EventRecorder *rec, const std::string &ts)
+#ifdef DEBUG
+inline void emit_rusage(EventRecorder *rec, const std::string &ts)
{
struct rusage ru;
rec->emit(evt);
}
}
+#endif
} // namespace
break;
}
- // Trace resource usage per each event notification
+// TODO: Add resurece measurement(e.g. RSS)
+// when ready with low overhead in release build
+#ifdef DEBUG
emit_rusage(_rec, ts);
+#endif
}
private:
std::mutex _mu;
- WriteFormat _write_format{WriteFormat::CHROME_TRACING};
+ // TODO: Allow user to control write_format
+ WriteFormat _write_format{WriteFormat::SNPE_BENCHMARK};
std::vector<DurationEvent> _duration_events;
std::vector<CounterEvent> _counter_events;
};
return ret;
}
+ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t output_height,
+ const int32_t output_width)
+{
+ assert(in_shape.rank() == 4);
+ ir::Shape ret(in_shape.rank());
+
+ ret.dim(0) = in_shape.dim(0);
+ ret.dim(1) = output_height;
+ ret.dim(2) = output_width;
+ ret.dim(3) = in_shape.dim(3);
+
+ return ret;
+}
+
template <typename T> ir::Shape inferRangeShape(T start_val, T limit_val, T delta_val)
{
ir::Shape out_shape(static_cast<int>(1));
using SubGraph = typename LoaderDomain::SubGraph;
using Tensor = typename LoaderDomain::Tensor;
using TensorType = typename LoaderDomain::TensorType;
+ using DimensionType = typename LoaderDomain::DimensionType;
+ using SparseIndexVector = typename LoaderDomain::SparseIndexVector;
protected:
bool isOptionalInputTensor(std::int32_t idx) { return idx == -1; }
* @param file_path
*/
void loadFromFile(const char *file_path);
+ /**
+ * @brief Load a model from a buffer
+ *
+ * @param buffer buffer pointer
+ * @param size buffer size
+ */
+ void loadFromBuffer(uint8_t *buffer, size_t size);
protected:
~BaseLoader() = default;
void loadSoftmax(const Operator *op, ir::Graph &subg);
void loadMaxPool2D(const Operator *op, ir::Graph &subg);
void loadConcatenation(const Operator *op, ir::Graph &subg);
- void loadInstanceNorm(const Operator *op, ir::Graph &subg);
void loadFill(const Operator *op, ir::Graph &subg);
void loadFC(const Operator *op, ir::Graph &subg);
void loadAdd(const Operator *op, ir::Graph &subg);
void loadSqueeze(const Operator *op, ir::Graph &subg);
void loadPrelu(const Operator *op, ir::Graph &subg);
void loadSplit(const Operator *op, ir::Graph &subg);
+ void loadSplitV(const Operator *op, ir::Graph &subg);
void loadSlice(const Operator *op, ir::Graph &subg);
void loadStridedSlice(const Operator *op, ir::Graph &subg);
void loadUnpack(const Operator *op, ir::Graph &subg);
void loadTile(const Operator *op, ir::Graph &subg);
void loadLogicalOr(const Operator *op, ir::Graph &subg);
void loadRange(const Operator *op, ir::Graph &subg);
- void loadBCQFullyConnected(const Operator *op, ir::Graph &subg);
- void loadBCQGather(const Operator *op, ir::Graph &subg);
void loadMatrixBandPart(const Operator *op, ir::Graph &subg);
void loadBroadcastTo(const Operator *op, ir::Graph &subg);
void loadFusedBatchNorm(const Operator *op, ir::Graph &subg);
void loadLogSoftmax(const Operator *op, ir::Graph &subg);
+ void loadQuantize(const Operator *op, ir::Graph &subg);
+ void loadSpaceToDepth(const Operator *op, ir::Graph &subg);
+ void loadStatelessRandomUniform(const Operator *op, ir::Graph &subg);
protected:
// Base address for mapped region for loading (if needed)
_verifier = std::make_unique<Verifier>(reinterpret_cast<const std::uint8_t *>(_base), size);
loadModel();
- munmap(_base, size);
close(_fd);
}
template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::loadFromBuffer(uint8_t *buffer,
+ size_t size)
+{
+ _base = buffer;
+ _verifier = std::make_unique<Verifier>(reinterpret_cast<const std::uint8_t *>(_base), size);
+ loadModel();
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
ir::Activation BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::convertActivation(
const ActivationFunctionType type)
{
}
}
+/* Copied from tensorflow lite. Need to append copyright */
+template <typename T> bool Copy(const T *data_ptr, std::vector<uint16_t> &arr)
+{
+ if (data_ptr->values() == nullptr)
+ {
+ return false;
+ }
+
+ int size = data_ptr->values()->size();
+ arr.reserve(size);
+ for (int i = 0; i < size; i++)
+ {
+ arr.emplace_back(static_cast<uint16_t>(data_ptr->values()->Get(i)));
+ }
+ return true;
+}
+
template <typename LoaderDomain, typename SpecificLoader>
ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Tensor *tensor,
ir::Graph &subg)
}
// Create TypeInfo
ir::TypeInfo type_info(data_type, scale, zero_point);
+ // Sparsity
+ auto src_sparsity = tensor->sparsity();
+ if (src_sparsity != nullptr)
+ {
+ std::vector<uint16_t> w1_segments;
+ std::vector<uint16_t> w1_indices;
+ // ignore traversal_order, block_map
+ // load metadata
+ const size_t dim_metadata_size = src_sparsity->dim_metadata()->size();
+ if (dim_metadata_size != 2)
+ throw std::runtime_error("sparse tensor is supported only for 2D");
+ const auto *src_metadata = src_sparsity->dim_metadata()->Get(0);
+ if (src_metadata->format() != DimensionType::DimensionType_DENSE)
+ throw std::runtime_error("sparse tensor dim[0] is not DENSE");
+ src_metadata = src_sparsity->dim_metadata()->Get(1);
+ if (src_metadata->format() != DimensionType::DimensionType_SPARSE_CSR)
+ throw std::runtime_error("sparse tensor dim[0] is not SPARSE_CSR");
+
+ auto ParseSparseIndexVector = [src_metadata, &w1_segments, &w1_indices]() {
+ if (src_metadata->array_segments() == nullptr || src_metadata->array_indices() == nullptr)
+ return false;
+ bool status = true;
+ switch (src_metadata->array_segments_type())
+ {
+ case SparseIndexVector::SparseIndexVector_Int32Vector:
+ status = Copy(src_metadata->array_segments_as_Int32Vector(), w1_segments);
+ break;
+ case SparseIndexVector::SparseIndexVector_Uint16Vector:
+ status = Copy(src_metadata->array_segments_as_Uint16Vector(), w1_segments);
+ break;
+ case SparseIndexVector::SparseIndexVector_Uint8Vector:
+ status = Copy(src_metadata->array_segments_as_Uint8Vector(), w1_segments);
+ break;
+ default:
+ return false;
+ }
+ if (status != true)
+ return false;
+ switch (src_metadata->array_indices_type())
+ {
+ case SparseIndexVector::SparseIndexVector_Int32Vector:
+ return Copy(src_metadata->array_indices_as_Int32Vector(), w1_indices);
+ case SparseIndexVector::SparseIndexVector_Uint16Vector:
+ return Copy(src_metadata->array_indices_as_Uint16Vector(), w1_indices);
+ case SparseIndexVector::SparseIndexVector_Uint8Vector:
+ return Copy(src_metadata->array_indices_as_Uint8Vector(), w1_indices);
+ default:
+ break;
+ }
+ return false;
+ };
+ if (ParseSparseIndexVector() == false)
+ throw std::runtime_error("Error during parsing sparsity index information");
+ type_info.sparse2DMetadata(std::move(w1_segments), std::move(w1_indices));
+ }
// Create operand
const auto operand_index = subg.addOperand(shape, type_info);
if (data != nullptr)
{
using std::ptrdiff_t;
- size_t data_size = data->size();
- ptrdiff_t unaligned_offset_start = data->data() - _base;
- ptrdiff_t offset_end = unaligned_offset_start + data_size;
-
- // Calculated aligned offset from base address of mapped region
- // munmap accepts memory address which is a multiple of the pagesize
- ptrdiff_t aligned_offset_start = (unaligned_offset_start / _pagesize) * _pagesize;
- size_t mmap_size = offset_end - aligned_offset_start;
-
- auto ptr = std::make_unique<ir::MMapedData>(_fd, aligned_offset_start, mmap_size,
- unaligned_offset_start, data_size);
- subg.setOperandValue(operand_index, std::move(ptr));
+ std::unique_ptr<ir::Data> data_obj;
+ if (_fd == -1) // Model is from memory
+ {
+ data_obj = std::make_unique<ir::ExternalData>(data->data(), data->size());
+ }
+ else // Model is loaded(mmap'd) from a file
+ {
+ data_obj = std::make_unique<ir::CachedData>(data->data(), data->size());
+ deallocateMmappedArea(const_cast<uint8_t *>(data->data()), data->size());
+ }
+ subg.setOperandValue(operand_index, std::move(data_obj));
}
// Name unused
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadInstanceNorm(const Operator *op, ir::Graph &subg)
-{
- ir::OperandIndexSequence inputs;
- ir::OperandIndexSequence outputs;
-
- loadOperationIO(op, inputs, outputs);
-
- ir::operation::InstanceNorm::Param param;
- const auto *options = op->builtin_options_as_InstanceNormOptions();
-
- param.activation = convertActivation(options->fused_activation_function());
- // Use default value 1e-5 if value of epsilon is zero
- param.epsilon = options->epsilon() == 0.f ? 1e-5 : options->epsilon();
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::InstanceNorm(inputs, outputs, param));
- subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadFill(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::operation::ResizeBilinear::Param param;
param.height_out = size_v[0];
param.width_out = size_v[1];
+ param.align_corners = op->builtin_options_as_ResizeBilinearOptions()->align_corners();
+ param.half_pixel_centers = op->builtin_options_as_ResizeBilinearOptions()->half_pixel_centers();
std::unique_ptr<ir::Operation> new_op(new ir::operation::ResizeBilinear({input}, outputs, param));
subg.addOperation(std::move(new_op));
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- auto input = inputs.at(0);
- auto block_shape = inputs.at(1);
- auto crops = inputs.at(2);
-
- if (!subg.operands().at(crops).isConstant())
- throw std::runtime_error("BatchToSpaceND: non-constant 'crops' is not supported.");
- std::vector<std::int32_t> crops_v = subg.operands().at(crops).template asVector<std::int32_t>();
- assert(crops_v.size() == 4);
- if (crops_v != std::vector<std::int32_t>{0, 0, 0, 0})
- throw std::runtime_error("BatchToSpaceND: 'crops' other than {0, 0, 0, 0} is not supported.");
-
- std::unique_ptr<ir::Operation> new_op{
- new ir::operation::BatchToSpaceND{{input, block_shape}, outputs}};
+ std::unique_ptr<ir::Operation> new_op{new ir::operation::BatchToSpaceND{inputs, outputs}};
subg.addOperation(std::move(new_op));
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadBCQGather(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadMatrixBandPart(const Operator *op,
+ ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- ir::operation::BCQGather::Param param;
- const auto *options = op->builtin_options_as_BCQGatherOptions();
- param.input_hidden_size = options->input_hidden_size();
- param.axis = options->axis();
-
- std::unique_ptr<ir::Operation> new_op(new ir::operation::BCQGather(inputs, outputs, param));
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::MatrixBandPart(inputs, outputs));
subg.addOperation(std::move(new_op));
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadBCQFullyConnected(const Operator *op,
- ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadBroadcastTo(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
loadOperationIO(op, inputs, outputs);
- ir::operation::BCQFullyConnected::Param param;
- const auto *options = op->builtin_options_as_BCQFullyConnectedOptions();
- param.weights_hidden_size = options->weights_hidden_size();
- param.activation = convertActivation(options->fused_activation_function());
-
- std::unique_ptr<ir::Operation> new_op(
- new ir::operation::BCQFullyConnected(inputs, outputs, param));
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::BroadcastTo(inputs, outputs));
subg.addOperation(std::move(new_op));
}
-
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMatrixBandPart(const Operator *op,
- ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSpaceToDepth(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
+ ir::operation::SpaceToDepth::Param param;
+
+ const auto *options = op->builtin_options_as_SpaceToDepthOptions();
+
+ param.block_size = options->block_size();
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::MatrixBandPart(inputs, outputs));
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::SpaceToDepth(inputs, outputs, param));
subg.addOperation(std::move(new_op));
}
template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadBroadcastTo(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadStatelessRandomUniform(const Operator *op,
+ ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
-
loadOperationIO(op, inputs, outputs);
- std::unique_ptr<ir::Operation> new_op(new ir::operation::BroadcastTo(inputs, outputs));
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::StatelessRandomUniform(inputs, outputs));
subg.addOperation(std::move(new_op));
}
BatchMatMul,
Einsum,
BroadcastTo,
- FusedBatchNorm
+ FusedBatchNorm,
+ StatelessRandomUniform
};
// Mapping from custom op name string to BuiltinOP enum
{"Einsum", BuiltinOP::Einsum},
{"FusedBatchNormV3", BuiltinOP::FusedBatchNorm},
{"BroadcastTo", BuiltinOP::BroadcastTo},
+ {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
};
try
case BuiltinOP::FusedBatchNorm:
loadFusedBatchNorm(op, subg);
break;
+ case BuiltinOP::StatelessRandomUniform:
+ loadStatelessRandomUniform(op, subg);
+ break;
default:
throw std::runtime_error{
"Loader: Custom OP map is defined but operation loader function is not defined"};
}
template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSplitV(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::SplitV::Param param{};
+
+ const auto *options = op->builtin_options_as_SplitVOptions();
+ param.num_splits = options->num_splits();
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::SplitV(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadSlice(const Operator *op, ir::Graph &subg)
{
ir::OperandIndexSequence inputs;
}
template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadQuantize(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::Quantize(inputs, outputs));
+ subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, ir::Graph &subg)
{
const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
case BuiltinOperator::BuiltinOperator_SPLIT:
loadSplit(op, subg);
return;
+ case BuiltinOperator::BuiltinOperator_SPLIT_V:
+ loadSplitV(op, subg);
+ return;
case BuiltinOperator::BuiltinOperator_SLICE:
loadSlice(op, subg);
return;
case BuiltinOperator::BuiltinOperator_LOG_SOFTMAX:
loadLogSoftmax(op, subg);
return;
+ case BuiltinOperator::BuiltinOperator_QUANTIZE:
+ loadQuantize(op, subg);
+ return;
+ case BuiltinOperator::BuiltinOperator_SPACE_TO_DEPTH:
+ loadSpaceToDepth(op, subg);
+ return;
default:
throw std::runtime_error(
std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
target_link_libraries(circle_loader PUBLIC onert_core)
target_link_libraries(circle_loader PRIVATE base_loader nnfw_common nnfw_coverage)
+target_link_libraries(circle_loader PRIVATE circle_schema)
install(TARGETS circle_loader DESTINATION lib)
namespace circle_loader
{
std::unique_ptr<ir::Subgraphs> loadModel(const char *filename);
+std::unique_ptr<ir::Subgraphs> loadModel(uint8_t *buffer, size_t size);
} // namespace circle_loader
} // namespace onert
using Tensor = circle::Tensor;
using TensorType = circle::TensorType;
using SubGraph = circle::SubGraph;
+ using DimensionType = circle::DimensionType;
+ using SparseIndexVector = circle::SparseIndexVector;
static const char *EnumNameBuiltinOperator(BuiltinOperator e)
{
class CircleLoader final : public base_loader::BaseLoader<LoaderDomain, CircleLoader>
{
+protected:
+ void loadInstanceNorm(const Operator *op, ir::Graph &subg);
+ void loadBCQFullyConnected(const Operator *op, ir::Graph &subg);
+ void loadBCQGather(const Operator *op, ir::Graph &subg);
+
public:
using BaseLoader::BaseLoader;
}
};
+void CircleLoader::loadInstanceNorm(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::InstanceNorm::Param param;
+ const auto *options = op->builtin_options_as_InstanceNormOptions();
+
+ param.activation = convertActivation(options->fused_activation_function());
+ // Use default value 1e-5 if value of epsilon is zero
+ param.epsilon = options->epsilon() == 0.f ? 1e-5 : options->epsilon();
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::InstanceNorm(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+void CircleLoader::loadBCQGather(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::BCQGather::Param param;
+ const auto *options = op->builtin_options_as_BCQGatherOptions();
+ param.input_hidden_size = options->input_hidden_size();
+ param.axis = options->axis();
+
+ std::unique_ptr<ir::Operation> new_op(new ir::operation::BCQGather(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
+void CircleLoader::loadBCQFullyConnected(const Operator *op, ir::Graph &subg)
+{
+ ir::OperandIndexSequence inputs;
+ ir::OperandIndexSequence outputs;
+
+ loadOperationIO(op, inputs, outputs);
+
+ ir::operation::BCQFullyConnected::Param param;
+ const auto *options = op->builtin_options_as_BCQFullyConnectedOptions();
+ param.weights_hidden_size = options->weights_hidden_size();
+ param.activation = convertActivation(options->fused_activation_function());
+
+ std::unique_ptr<ir::Operation> new_op(
+ new ir::operation::BCQFullyConnected(inputs, outputs, param));
+ subg.addOperation(std::move(new_op));
+}
+
} // namespace
std::unique_ptr<ir::Subgraphs> loadModel(const char *filename)
return subgraphs;
}
+std::unique_ptr<ir::Subgraphs> loadModel(uint8_t *buffer, size_t size)
+{
+ auto subgraphs = std::make_unique<ir::Subgraphs>();
+ CircleLoader loader(subgraphs);
+ loader.loadFromBuffer(buffer, size);
+ return subgraphs;
+}
+
} // namespace circle_loader
} // namespace onert
--- /dev/null
+add_library(circle_schema INTERFACE)
+
+nnfw_find_package(FlatBuffers REQUIRED)
+
+target_link_libraries(circle_schema INTERFACE flatbuffers::flatbuffers)
+
+target_include_directories(circle_schema INTERFACE include)
}
const ANeuralNetworksOperationTypeEx FIRST_OPERATION = ANEURALNETWORKS_CAST_EX;
- const ANeuralNetworksOperationTypeEx LAST_OPERATION = ANEURALNETWORKS_ADDV2_EX;
+ const ANeuralNetworksOperationTypeEx LAST_OPERATION = ANEURALNETWORKS_SPLIT_V_EX;
if ((type < FIRST_OPERATION) || (type > LAST_OPERATION))
{
VERBOSE(NNAPI::Model) << "addOperation: Invalid operation type" << std::endl;
};
}
-} // namespace
+template <typename T>
+Operation *CreateSimpleUnaryOp(const OperationFactory::Param &init_param, Operands &)
+{
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
-OperationFactory &OperationFactory::get()
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+
+ return new T{inputs, outputs};
+}
+
+// A generator function for binary ops with no params
+template <typename T>
+Operation *createSimpleBinaryOp(const OperationFactory::Param &init_param, Operands &)
{
- static OperationFactory factory;
- return factory;
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ return new T{inputs, outputs};
}
-OperationFactory::OperationFactory()
+// A generator function for binary ops with no params
+template <typename T>
+Operation *createPool2DOp(const OperationFactory::Param &init_param, Operands &operands)
{
- _map[ANEURALNETWORKS_BATCH_TO_SPACE_ND] = [](const OperationFactory::Param &init_param,
- Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
+ assert(init_param.input_count == 7 || init_param.input_count == 10);
+ assert(init_param.output_count == 1);
- OperandIndexSequence outputs{init_param.outputs[0]};
+ // In common
+ // 0 -> IFM Tensor Index
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+ typename T::Param param;
+ if (init_param.input_count == 7) // support implicit padding
+ {
// Each input should be interpreted as follows:
//
- // 0 -> Input Tensor Index
- // 1 -> Block size Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+ // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+ // 2 -> Horizontal (over width) Stride Index
+ // 3 -> Vertial (over height) Stride Index
+ // 4 -> Filter Width Index
+ // 5 -> Filter Height Index
+ // 6 -> FuseCode (activation) Index
- return new operation::BatchToSpaceND{inputs, outputs};
- };
+ const auto padding_index = OperandIndex{init_param.inputs[1]};
+ const auto hstride_index = OperandIndex{init_param.inputs[2]};
+ const auto vstride_index = OperandIndex{init_param.inputs[3]};
+ const auto kw_index = OperandIndex{init_param.inputs[4]};
+ const auto kh_index = OperandIndex{init_param.inputs[5]};
+ const auto activation_index = OperandIndex{init_param.inputs[6]};
+
+ param.padding.type =
+ NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.kw = getUint32Scalar(operands, kw_index);
+ param.kh = operands.at(kh_index).asScalar<uint32_t>();
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ else // support explicit padding
+ {
+ // Each input should be interpreted as follows:
+ //
+ // 1 -> Padding_left index
+ // 2 -> Padding_right index
+ // 3 -> Padding_top index
+ // 4 -> Padding_bottom index
+ // 5 -> Horizontal (over width) Stride Index
+ // 6 -> Vertial (over height) Stride Index
+ // 7 -> Filter Width Index
+ // 8 -> Filter Height Index
+ // 9 -> FuseCode (activation) Index
+
+ const auto padding_left_index = OperandIndex{init_param.inputs[1]};
+ const auto padding_right_index = OperandIndex{init_param.inputs[2]};
+ const auto padding_top_index = OperandIndex{init_param.inputs[3]};
+ const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
+ const auto hstride_index = OperandIndex{init_param.inputs[5]};
+ const auto vstride_index = OperandIndex{init_param.inputs[6]};
+ const auto kw_index = OperandIndex{init_param.inputs[7]};
+ const auto kh_index = OperandIndex{init_param.inputs[8]};
+ const auto activation_index = OperandIndex{init_param.inputs[9]};
+
+ param.padding.type = PaddingType::EXPLICIT;
+ param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+ padding_top_index, padding_bottom_index);
+ param.stride = makeStride(operands, hstride_index, vstride_index);
+ param.kw = getUint32Scalar(operands, kw_index);
+ param.kh = getUint32Scalar(operands, kh_index);
+ param.activation =
+ NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+ }
+ return new T{inputs, outputs, param};
+}
+
+} // namespace
+
+OperationFactory &OperationFactory::get()
+{
+ static OperationFactory factory;
+ return factory;
+}
+
+OperationFactory::OperationFactory()
+{
+ // Each input should be interpreted as follows:
+ // 0 -> Input Tensor Index
+ // 1 -> Block size Index
+ _map[ANEURALNETWORKS_BATCH_TO_SPACE_ND] = createSimpleBinaryOp<operation::BatchToSpaceND>;
_map[ANEURALNETWORKS_DEPTHWISE_CONV_2D] = [](const OperationFactory::Param &init_param,
Operands &operands) {
return new operation::DepthwiseConv2D{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_MAX_POOL_2D] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 7 || init_param.input_count == 10);
- assert(init_param.output_count == 1);
+ _map[ANEURALNETWORKS_MAX_POOL_2D] = createPool2DOp<operation::MaxPool2D>;
- // In common
- // 0 -> IFM Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- operation::MaxPool2D::Param param;
- if (init_param.input_count == 7) // support implicit padding
- {
- // Each input should be interpreted as follows:
- //
- // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
- // 2 -> Horizontal (over width) Stride Index
- // 3 -> Vertial (over height) Stride Index
- // 4 -> Filter Width Index
- // 5 -> Filter Height Index
- // 6 -> FuseCode (activation) Index
-
- const auto padding_index = OperandIndex{init_param.inputs[1]};
- const auto hstride_index = OperandIndex{init_param.inputs[2]};
- const auto vstride_index = OperandIndex{init_param.inputs[3]};
- const auto kw_index = OperandIndex{init_param.inputs[4]};
- const auto kh_index = OperandIndex{init_param.inputs[5]};
- const auto activation_index = OperandIndex{init_param.inputs[6]};
-
- param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = operands.at(kh_index).asScalar<uint32_t>();
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
- else if (init_param.input_count == 10) // support explicit padding
- {
- // Each input should be interpreted as follows:
- //
- // 1 -> Padding_left index
- // 2 -> Padding_right index
- // 3 -> Padding_top index
- // 4 -> Padding_bottom index
- // 5 -> Horizontal (over width) Stride Index
- // 6 -> Vertial (over height) Stride Index
- // 7 -> Filter Width Index
- // 8 -> Filter Height Index
- // 9 -> FuseCode (activation) Index
-
- const auto padding_left_index = OperandIndex{init_param.inputs[1]};
- const auto padding_right_index = OperandIndex{init_param.inputs[2]};
- const auto padding_top_index = OperandIndex{init_param.inputs[3]};
- const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
- const auto hstride_index = OperandIndex{init_param.inputs[5]};
- const auto vstride_index = OperandIndex{init_param.inputs[6]};
- const auto kw_index = OperandIndex{init_param.inputs[7]};
- const auto kh_index = OperandIndex{init_param.inputs[8]};
- const auto activation_index = OperandIndex{init_param.inputs[9]};
-
- param.padding.type = PaddingType::EXPLICIT;
- param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
- padding_top_index, padding_bottom_index);
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = getUint32Scalar(operands, kh_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
- return new operation::MaxPool2D{inputs, outputs, param};
- };
-
- _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- // TODO We may reuse code here for MAX_POOL_2D. Seems like these two are identical
- assert(init_param.input_count == 7 || init_param.input_count == 10);
- assert(init_param.output_count == 1);
-
- // In common
- // 0 -> IFM Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- operation::AvgPool2D::Param param;
- if (init_param.input_count == 7) // support implicit padding
- {
- // Each input should be interpreted as follows:
- //
- // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
- // 2 -> Horizontal (over width) Stride Index
- // 3 -> Vertial (over height) Stride Index
- // 4 -> Filter Width Index
- // 5 -> Filter Height Index
- // 6 -> FuseCode (activation) Index
-
- const auto padding_index = OperandIndex{init_param.inputs[1]};
- const auto hstride_index = OperandIndex{init_param.inputs[2]};
- const auto vstride_index = OperandIndex{init_param.inputs[3]};
- const auto kw_index = OperandIndex{init_param.inputs[4]};
- const auto kh_index = OperandIndex{init_param.inputs[5]};
- const auto activation_index = OperandIndex{init_param.inputs[6]};
-
- param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = getUint32Scalar(operands, kh_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
- else if (init_param.input_count == 10) // support explicit padding
- {
- // Each input should be interpreted as follows:
- //
- // 1 -> Padding_left index
- // 2 -> Padding_right index
- // 3 -> Padding_top index
- // 4 -> Padding_bottom index
- // 5 -> Horizontal (over width) Stride Index
- // 6 -> Vertial (over height) Stride Index
- // 7 -> Filter Width Index
- // 8 -> Filter Height Index
- // 9 -> FuseCode (activation) Index
-
- const auto padding_left_index = OperandIndex{init_param.inputs[1]};
- const auto padding_right_index = OperandIndex{init_param.inputs[2]};
- const auto padding_top_index = OperandIndex{init_param.inputs[3]};
- const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
- const auto hstride_index = OperandIndex{init_param.inputs[5]};
- const auto vstride_index = OperandIndex{init_param.inputs[6]};
- const auto kw_index = OperandIndex{init_param.inputs[7]};
- const auto kh_index = OperandIndex{init_param.inputs[8]};
- const auto activation_index = OperandIndex{init_param.inputs[9]};
-
- param.padding.type = PaddingType::EXPLICIT;
- param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
- padding_top_index, padding_bottom_index);
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = getUint32Scalar(operands, kh_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
-
- return new operation::AvgPool2D{inputs, outputs, param};
- };
+ _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = createPool2DOp<operation::AvgPool2D>;
_map[ANEURALNETWORKS_CONCATENATION] = [](const OperationFactory::Param &init_param,
Operands &operands) {
return new operation::Squeeze{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_TANH] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Tanh{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_LOG] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Log{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_LOGISTIC] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
+ _map[ANEURALNETWORKS_TANH] = CreateSimpleUnaryOp<operation::Tanh>;
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
+ _map[ANEURALNETWORKS_LOG] = CreateSimpleUnaryOp<operation::Log>;
- return new operation::Logistic{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_LOGISTIC] = CreateSimpleUnaryOp<operation::Logistic>;
_map[ANEURALNETWORKS_DIV] = [](const OperationFactory::Param &init_param, Operands &operands) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
return new operation::Div{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_EXP] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Exp{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_EXP] = CreateSimpleUnaryOp<operation::Exp>;
// ANEURALNETWORKS_EXP_EX is deprecated
// TODO Remove ANEURALNETWORKS_EXP_EX
_map[ANEURALNETWORKS_EXP_EX] = _map[ANEURALNETWORKS_EXP];
- _map[ANEURALNETWORKS_EXPAND_DIMS] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- // 1 -> Axis Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- return new operation::ExpandDims{inputs, outputs};
- };
+ // Each input should be interpreted as follows:
+ // 0 -> Input Tensor Index
+ // 1 -> Axis Tensor Index
+ _map[ANEURALNETWORKS_EXPAND_DIMS] = createSimpleBinaryOp<operation::ExpandDims>;
_map[ANEURALNETWORKS_GREATER] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 2 && init_param.output_count == 1);
return new operation::Comparison{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_LOGICAL_AND] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input0 Tensor Index
- // 1 -> input1 Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- return new operation::LogicalAnd{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_LOGICAL_AND] = createSimpleBinaryOp<operation::LogicalAnd>;
// ANEURALNETWORKS_LOGICAL_AND_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX
return new operation::LogicalAnd{inputs, outputs};
};
- _map[ANEURALNETWORKS_RSQRT] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::RSQRT{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_RSQRT] = CreateSimpleUnaryOp<operation::RSQRT>;
_map[ANEURALNETWORKS_SELECT] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
// TODO Remove ANEURALNETWORKS_RSQRT_EX
_map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT];
- _map[ANEURALNETWORKS_RELU] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::ReLU{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_RELU] = CreateSimpleUnaryOp<operation::ReLU>;
_map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param,
Operands &operands) {
operation::ResizeBilinear::Param param;
param.height_out = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<int32_t>();
param.width_out = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int32_t>();
-
+ param.align_corners = false;
+ param.half_pixel_centers = false;
return new operation::ResizeBilinear{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_RELU1] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::ReLU1{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_RELU6] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
+ _map[ANEURALNETWORKS_RELU1] = CreateSimpleUnaryOp<operation::ReLU1>;
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::ReLU6{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_RELU6] = CreateSimpleUnaryOp<operation::ReLU6>;
_map[ANEURALNETWORKS_REVERSE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 2 && init_param.output_count == 1);
return new operation::SpaceToDepth{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_L2_POOL_2D] = [](const OperationFactory::Param &init_param,
- Operands &operands) {
- assert(init_param.input_count == 10 || init_param.input_count == 7);
- assert(init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> IFM Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- operation::L2Pool2D::Param param;
-
- if (init_param.input_count == 7) // Imlicit Padding case
- {
- // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
- // 2 -> Horizontal (over width) Stride Index
- // 3 -> Vertial (over height) Stride Index
- // 4 -> Filter Width Index
- // 5 -> Filter Height Index
- // 6 -> FuseCode (activation) Index
- const auto padding_index = OperandIndex{init_param.inputs[1]};
- const auto hstride_index = OperandIndex{init_param.inputs[2]};
- const auto vstride_index = OperandIndex{init_param.inputs[3]};
- const auto kw_index = OperandIndex{init_param.inputs[4]};
- const auto kh_index = OperandIndex{init_param.inputs[5]};
- const auto activation_index = OperandIndex{init_param.inputs[6]};
-
- param.padding.type =
- NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = getUint32Scalar(operands, kh_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
- else // Explicit Padding case
- {
- // 1 -> Padding_left index
- // 2 -> Padding_right index
- // 3 -> Padding_top index
- // 4 -> Padding_bottom index
- // 5 -> Horizontal (over width) Stride Index
- // 6 -> Vertial (over height) Stride Index
- // 7 -> Filter Width Index
- // 8 -> Filter Height Index
- // 9 -> FuseCode (activation) Index
- const auto padding_left_index = OperandIndex{init_param.inputs[1]};
- const auto padding_right_index = OperandIndex{init_param.inputs[2]};
- const auto padding_top_index = OperandIndex{init_param.inputs[3]};
- const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
- const auto hstride_index = OperandIndex{init_param.inputs[5]};
- const auto vstride_index = OperandIndex{init_param.inputs[6]};
- const auto kw_index = OperandIndex{init_param.inputs[7]};
- const auto kh_index = OperandIndex{init_param.inputs[8]};
- const auto activation_index = OperandIndex{init_param.inputs[9]};
-
- param.padding.type = PaddingType::EXPLICIT;
- param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
- padding_top_index, padding_bottom_index);
- param.stride = makeStride(operands, hstride_index, vstride_index);
- param.kw = getUint32Scalar(operands, kw_index);
- param.kh = getUint32Scalar(operands, kh_index);
- param.activation =
- NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
- }
-
- return new operation::L2Pool2D{inputs, outputs, param};
- };
+ _map[ANEURALNETWORKS_L2_POOL_2D] = createPool2DOp<operation::L2Pool2D>;
_map[ANEURALNETWORKS_EMBEDDING_LOOKUP] = [](const OperationFactory::Param &init_param,
Operands &) {
return new operation::LogicalOr{inputs, outputs};
};
- _map[ANEURALNETWORKS_LOGICAL_NOT] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::LogicalNot{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_LOGICAL_NOT] = CreateSimpleUnaryOp<operation::LogicalNot>;
// ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated
// TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX
// TODO Remove ANEURALNETWORKS_GATHER_EX
_map[ANEURALNETWORKS_GATHER_EX] = _map[ANEURALNETWORKS_GATHER];
- _map[ANEURALNETWORKS_NEG] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Neg{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_NEG] = CreateSimpleUnaryOp<operation::Neg>;
// ANEURALNETWORKS_NEG_EX is deprecated
// TODO Remove ANEURALNETWORKS_NEG_EX
_map[ANEURALNETWORKS_NEG_EX] = _map[ANEURALNETWORKS_NEG];
- _map[ANEURALNETWORKS_ABS] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Abs{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_ABS] = CreateSimpleUnaryOp<operation::Abs>;
// ANEURALNETWORKS_ABS_EX is deprecated
// TODO Remove ANEURALNETWORKS_ABS_EX
// TODO Remove ANEURALNETWORKS_ARGMAX_EX
_map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX];
- _map[ANEURALNETWORKS_DEQUANTIZE] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 1 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- OperandIndexSequence inputs{init_param.inputs[0]};
-
- return new operation::Dequantize{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_DEQUANTIZE] = CreateSimpleUnaryOp<operation::Dequantize>;
_map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) {
assert(init_param.input_count == 3 && init_param.output_count == 1);
return new operation::Split{inputs, outputs, param};
};
+ _map[ANEURALNETWORKS_SPLIT_V_EX] = [](const OperationFactory::Param &init_param,
+ Operands &operands) {
+ assert(init_param.input_count == 4);
+ assert(init_param.output_count >= 1); // At least one output tensor and axis
+
+ OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
+ OperandIndexSequence outputs;
+ for (uint32_t n = 0; n < init_param.output_count; ++n)
+ {
+ outputs.append(OperandIndex{init_param.outputs[n]});
+ }
+
+ operation::SplitV::Param param;
+ param.num_splits = operands.at(OperandIndex{init_param.inputs[3]}).asScalar<std::int32_t>();
+ return new operation::SplitV{inputs, outputs, param};
+ };
+
// ANEURALNETWORKS_SPLIT_EX is deprecated
// TODO Remove ANEURALNETWORKS_SPLIT_EX
_map[ANEURALNETWORKS_SPLIT_EX] = _map[ANEURALNETWORKS_SPLIT];
};
_map[ANEURALNETWORKS_PAD] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count >= 1);
+ assert(init_param.input_count >= 2 && init_param.input_count <= 3 &&
+ init_param.output_count >= 1);
OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+ if (init_param.input_count == 3)
+ {
+ inputs.append(OperandIndex{init_param.inputs[2]});
+ }
OperandIndexSequence outputs{init_param.outputs[0]};
return new operation::Pad{inputs, outputs};
};
- _map[ANEURALNETWORKS_MINIMUM] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- OperandIndexSequence outputs{init_param.outputs[0]};
+ _map[ANEURALNETWORKS_PAD_V2] = _map[ANEURALNETWORKS_PAD];
- return new operation::Min{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_MAXIMUM] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- OperandIndexSequence outputs{init_param.outputs[0]};
+ _map[ANEURALNETWORKS_MINIMUM] = createSimpleBinaryOp<operation::Min>;
- return new operation::Max{inputs, outputs};
- };
+ _map[ANEURALNETWORKS_MAXIMUM] = createSimpleBinaryOp<operation::Max>;
_map[ANEURALNETWORKS_ONE_HOT_EX] = [](const OperationFactory::Param &init_param,
Operands &operands) {
return new operation::Range{inputs, outputs};
};
- _map[ANEURALNETWORKS_POW] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> LHS Tensor Index
- // 1 -> RHS Tensor Index
+ // Each input should be interpreted as follows:
+ // 0 -> LHS Tensor Index
+ // 1 -> RHS Tensor Index
+ _map[ANEURALNETWORKS_POW] = createSimpleBinaryOp<operation::Pow>;
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- return new operation::Pow{inputs, outputs};
- };
-
- _map[ANEURALNETWORKS_FILL_EX] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- // Each input should be interpreted as follows:
- //
- // 0 -> A tensor, specifying the input.
- // 1 -> A 1-D tensor, specifying the value
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- return new operation::Fill{inputs, outputs};
- };
+ // Each input should be interpreted as follows:
+ // 0 -> A tensor, specifying the input.
+ // 1 -> A 1-D tensor, specifying the value
+ _map[ANEURALNETWORKS_FILL_EX] = createSimpleBinaryOp<operation::Fill>;
_map[ANEURALNETWORKS_ZEROS_LIKE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
assert(init_param.input_count == 1 && init_param.output_count == 1);
return new operation::ZerosLike{inputs, outputs};
};
- _map[ANEURALNETWORKS_TILE] = [](const OperationFactory::Param &init_param, Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
-
- OperandIndexSequence outputs{init_param.outputs[0]};
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Input Tensor Index
- // 1 -> Multiple Tensor Index
-
- OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
- return new operation::Tile{inputs, outputs};
- };
+ // Each input should be interpreted as follows:
+ // 0 -> Input Tensor Index
+ // 1 -> Multiple Tensor Index
+ _map[ANEURALNETWORKS_TILE] = createSimpleBinaryOp<operation::Tile>;
_map[ANEURALNETWORKS_MATRIX_BAND_PART_EX] = [](const OperationFactory::Param &init_param,
Operands &) {
return new operation::Einsum{inputs, outputs, param};
};
- _map[ANEURALNETWORKS_BROADCAST_TO_EX] = [](const OperationFactory::Param &init_param,
- Operands &) {
- assert(init_param.input_count == 2 && init_param.output_count == 1);
+ // 0 -> Input Tensor Index
+ // 1 -> int32, int64, An 1-D int tensor Index
+ _map[ANEURALNETWORKS_BROADCAST_TO_EX] = createSimpleBinaryOp<operation::BroadcastTo>;
+ _map[ANEURALNETWORKS_STATELESS_RANDOM_UNIFORM_EX] = [](const OperationFactory::Param &init_param,
+ Operands &) {
+ assert(init_param.input_count == 2 && init_param.output_count == 1);
OperandIndexSequence outputs{init_param.outputs[0]};
// Each input should be interpreted as follows:
//
- // 0 -> Input Tensor Index
+ // 0 -> Shape Tensor Index
// 1 -> int32, int64, An 1-D int tensor Index
OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
- return new operation::BroadcastTo{inputs, outputs};
+ return new operation::StatelessRandomUniform{inputs, outputs};
};
_map[ANEURALNETWORKS_FUSED_BATCH_NORM_V3_EX] = [](const OperationFactory::Param &init_param,
return new operation::LogSoftmax{inputs, outputs, param};
};
+
+ _map[ANEURALNETWORKS_QUANTIZE] = [](const OperationFactory::Param &init_param, Operands &) {
+ assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+ OperandIndexSequence inputs{init_param.inputs[0]};
+ OperandIndexSequence outputs{init_param.outputs[0]};
+
+ return new operation::Quantize{inputs, outputs};
+ };
}
Operation *OperationFactory::create(ANeuralNetworksOperationType type,
using Tensor = onert_tflite::Tensor;
using TensorType = onert_tflite::TensorType;
using SubGraph = onert_tflite::SubGraph;
+ using DimensionType = onert_tflite::DimensionType;
+ using SparseIndexVector = onert_tflite::SparseIndexVector;
static const char *EnumNameBuiltinOperator(BuiltinOperator e)
{
list(APPEND MINIMAL_SRCS "src/minimal.cc")
-add_executable(minimal ${MINIMAL_SRCS})
-target_link_libraries(minimal nnfw-dev pthread dl)
+add_executable(onert-minimal-app ${MINIMAL_SRCS})
+target_link_libraries(onert-minimal-app nnfw-dev pthread dl)
-install(TARGETS minimal DESTINATION bin)
+install(TARGETS onert-minimal-app DESTINATION bin)
#include "nnfw.h"
#include <vector>
+#include <iostream>
uint64_t num_elems(const nnfw_tensorinfo *ti)
{
nnfw_close_session(session);
+ std::cout << "nnpackage " << argv[1] << " runs successfully." << std::endl;
return 0;
}
// Compile
auto subgs = std::make_shared<onert::ir::Subgraphs>();
subgs->push(onert::ir::SubgraphIndex{0}, graph);
- auto compiler = new onert::compiler::Compiler{subgs};
- executors = compiler->compile();
- delete compiler;
+ onert::compiler::Compiler compiler{subgs};
+ executors = compiler.compile();
}
public:
float output_buffer[4] = {};
const float output_expected[4] = {5, -2, 0, -1};
- auto execution = new onert::exec::Execution(executors);
+ onert::exec::Execution execution{executors};
- execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
- execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
- execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
- execution->execute();
+ execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+ execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+ execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+ execution.execute();
for (auto i = 0; i < 4; i++)
{
EXPECT_EQ(output_buffer[i], output_expected[i]);
}
-
- delete execution;
}
TEST(ExecInstance, twoCompile)
auto mockup = CompiledMockUpModel();
auto graph = mockup.graph;
auto executors1 = mockup.executors;
- auto execution1 = new onert::exec::Execution(executors1);
+ onert::exec::Execution execution1{executors1};
auto input1 = IOIndex{0};
auto input2 = IOIndex{1};
float exe1_output_buffer[4] = {};
const float exe1_output_expected[4] = {5, -2, 0, -1};
- execution1->setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
- execution1->setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
- execution1->setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
+ execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+ execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+ execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
// Make new executor: compile again
auto subgs = std::make_shared<onert::ir::Subgraphs>();
subgs->push(onert::ir::SubgraphIndex{0}, graph);
- auto compiler = new onert::compiler::Compiler{subgs};
- std::shared_ptr<onert::exec::ExecutorMap> executors2 = compiler->compile();
- auto execution2 = new onert::exec::Execution(executors2);
+ onert::compiler::Compiler compiler{subgs};
+ std::shared_ptr<onert::exec::ExecutorMap> executors2 = compiler.compile();
+ onert::exec::Execution execution2{executors2};
const float exe2_input1_buffer[4] = {2, 1, -2, 0};
const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
float exe2_output_buffer[4] = {};
const float exe2_output_expected[4] = {2, 5, -2, 7};
- execution2->setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
- execution2->setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
- execution2->setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
+ execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+ execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+ execution2.setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
- execution1->execute();
- execution2->execute();
+ execution1.execute();
+ execution2.execute();
for (auto i = 0; i < 4; i++)
{
EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
}
-
- delete compiler;
- delete execution1;
- delete execution2;
}
// Support two initialized execution instance then ordered execution
const float exe1_output_expected[4] = {5, -2, 0, -1};
const float exe2_output_expected[4] = {2, 5, -2, 7};
- auto execution1 = new onert::exec::Execution(executors);
- execution1->setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
- execution1->setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
- execution1->setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
+ onert::exec::Execution execution1{executors};
+ execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+ execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+ execution1.setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
const float exe2_input1_buffer[4] = {2, 1, -2, 0};
const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
float exe2_output_buffer[4] = {};
// Make new execution
- auto execution2 = new onert::exec::Execution(executors);
- execution2->setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
- execution2->setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
- execution2->setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
+ onert::exec::Execution execution2{executors};
+ execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+ execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+ execution2.setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
- execution1->execute();
- execution2->execute();
+ execution1.execute();
+ execution2.execute();
for (auto i = 0; i < 4; i++)
{
EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
}
-
- delete execution1;
- delete execution2;
}
class Inference
auto input2 = IOIndex{1};
auto output1 = IOIndex{0};
- auto execution = new onert::exec::Execution(_executors);
- execution->setInput(input1, reinterpret_cast<const void *>(_input1), 16);
- execution->setInput(input2, reinterpret_cast<const void *>(_input2), 16);
- execution->setOutput(output1, reinterpret_cast<void *>(_output), 16);
+ onert::exec::Execution execution{_executors};
+ execution.setInput(input1, reinterpret_cast<const void *>(_input1), 16);
+ execution.setInput(input2, reinterpret_cast<const void *>(_input2), 16);
+ execution.setOutput(output1, reinterpret_cast<void *>(_output), 16);
- execution->execute();
-
- delete execution;
+ execution.execute();
}
private:
float output_buffer[4] = {};
const float output_expected[4] = {5, -2, 0, -1};
- auto execution = new onert::exec::Execution(executors);
+ onert::exec::Execution execution{executors};
- execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
- execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
- execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
- execution->startExecute();
- execution->waitFinish();
+ execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+ execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+ execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+ execution.startExecute();
+ execution.waitFinish();
for (auto i = 0; i < 4; i++)
{
EXPECT_EQ(output_buffer[i], output_expected[i]);
}
-
- delete execution;
}
} // namespace
ASSERT_EQ(verifier.verify(graph), true);
// Check def
- ASSERT_EQ(graph.operands().at(operand_index1).getDef().contains(mocknode_index1), true);
- ASSERT_EQ(graph.operands().at(operand_index2).getDef().contains(mocknode_index2), true);
- ASSERT_EQ(graph.operands().at(output_operand).getDef().contains(multiinput_index), true);
+ ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1);
+ ASSERT_EQ(graph.operands().at(operand_index2).getDef(), mocknode_index2);
+ ASSERT_EQ(graph.operands().at(output_operand).getDef(), multiinput_index);
- ASSERT_EQ(graph.operands().at(operand_index1).getDef().contains(mocknode_index2), false);
- ASSERT_EQ(graph.operands().at(operand_index1).getDef().contains(multiinput_index), false);
+ ASSERT_NE(graph.operands().at(operand_index1).getDef(), mocknode_index2);
+ ASSERT_NE(graph.operands().at(operand_index1).getDef(), multiinput_index);
// Check use
ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true);
add_nnfw_custom_op_app(FillFrom_runner
SOURCES FillFrom_runner.cc
KERNELS FillFrom)
-install(TARGETS FillFrom_runner DESTINATION tests)
-install(DIRECTORY nnpkgs/FillFrom DESTINATION tests/nnpkgs)
-install_nnfw_custom_op_kernel(FillFrom tests/nnpkgs/FillFrom)
+install(TARGETS FillFrom_runner DESTINATION test)
+install(DIRECTORY nnpkgs/FillFrom DESTINATION test/nnpkgs)
+install_nnfw_custom_op_kernel(FillFrom test/nnpkgs/FillFrom)
*/
#include "nnfw.h"
-#include "nnfw_dev.h"
+#include "nnfw_experimental.h"
#include <cassert>
#include <iostream>
* limitations under the License.
*/
-#include "nnfw_dev.h"
+#include "nnfw_experimental.h"
#include "flatbuffers/flexbuffers.h"
GeneratedTests.cast_float16_to_quant8_overflow
GeneratedTests.cast_float32_to_float16
GeneratedTests.cast_float32_to_float16_relaxed
+GeneratedTests.cast_float32_to_int32_nnfw
GeneratedTests.cast_int32_to_float16
-GeneratedTests.cast_int32_to_quant8_overflow
GeneratedTests.cast_quant8_to_float16
GeneratedTests.concat_dynamic_nnfw
GeneratedTests.conv_dynamic_nnfw
GeneratedTests.gather_float16_8
GeneratedTests.greater_dynamic_float_nnfw
GeneratedTests.greater_equal_dynamic_float_nnfw
+GeneratedTests.l2_normalization_quant8_nnfw
GeneratedTests.less_dynamic_float_nnfw
GeneratedTests.less_equal_dynamic_float_nnfw
GeneratedTests.log_4D_float_nnfw
GeneratedTests.one_hot_ex_dynamic_nnfw
GeneratedTests.pack_ex_dynamic_nnfw
GeneratedTests.pad_dynamic_nnfw
+GeneratedTests.pad_v2_1_float
+GeneratedTests.pad_v2_1_quant8
+GeneratedTests.pad_v2_all_dims
+GeneratedTests.pad_v2_all_dims_quant8
+GeneratedTests.pad_v2_low_rank
+GeneratedTests.pad_v2_low_rank_quant8
GeneratedTests.pow_2D_float_nnfw
GeneratedTests.pow_broadcast_float_nnfw
GeneratedTests.pow_broadcast_float_nnfw_2
GeneratedTests.pow_broadcast_float_nnfw_3
GeneratedTests.pow_dynamic_nnfw
+GeneratedTests.quantize_quant8
+GeneratedTests.quantize_quant8_2
+GeneratedTests.quantize_quant8_3
+GeneratedTests.quantize_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
GeneratedTests.range_ex_float_1
GeneratedTests.range_ex_float_1_all_constant_inputs
GeneratedTests.range_ex_float_1_dynamic_nnfw
GeneratedTests.softmax_dynamic_nnfw
GeneratedTests.space_to_batch_dynamic_float_nnfw
GeneratedTests.split_dynamic_float_nnfw
+GeneratedTests.split_v_ex_1D_float_1_nnfw
+GeneratedTests.split_v_ex_1D_float_2_nnfw
+GeneratedTests.split_v_ex_1D_int32_nnfw
+GeneratedTests.split_v_ex_4D_float_1_nnfw
+GeneratedTests.split_v_ex_4D_float_2_nnfw
+GeneratedTests.split_v_ex_4D_float_3_nnfw
+GeneratedTests.split_v_ex_4D_float_4_nnfw
+GeneratedTests.split_v_ex_4D_int32_1_nnfw
+GeneratedTests.split_v_ex_4D_int32_2_nnfw
+GeneratedTests.split_v_ex_4D_int32_3_nnfw
+GeneratedTests.split_v_ex_4D_int32_4_nnfw
GeneratedTests.sqrt_
GeneratedTests.squared_difference_ex_dynamic_nnfw
GeneratedTests.squeeze_dynamic_float_nnfw
+GeneratedTests.stateless_random_uniform_ex_nnfw
GeneratedTests.strided_slice_dynamic_nnfw
GeneratedTests.sub_dynamic_nnfw
GeneratedTests.sub_v1_2_zero_sized
GeneratedTests.cast_float16_to_quant8_overflow
GeneratedTests.cast_float32_to_float16
GeneratedTests.cast_float32_to_float16_relaxed
-GeneratedTests.cast_float32_to_quant8_overflow
-GeneratedTests.cast_float32_to_quant8_overflow_relaxed
GeneratedTests.cast_int32_to_float16
-GeneratedTests.cast_int32_to_quant8_overflow
GeneratedTests.cast_quant8_to_float16
GeneratedTests.concat_dynamic_nnfw
GeneratedTests.conv_dynamic_nnfw
GeneratedTests.greater_dynamic_float_nnfw
GeneratedTests.greater_equal_boolean
GeneratedTests.greater_equal_dynamic_float_nnfw
+GeneratedTests.l2_normalization_quant8_nnfw
GeneratedTests.less_boolean
GeneratedTests.less_dynamic_float_nnfw
GeneratedTests.less_equal_dynamic_float_nnfw
GeneratedTests.one_hot_ex_dynamic_nnfw
GeneratedTests.pack_ex_dynamic_nnfw
GeneratedTests.pad_dynamic_nnfw
+GeneratedTests.pad_v2_1_float
+GeneratedTests.pad_v2_1_quant8
+GeneratedTests.pad_v2_all_dims
+GeneratedTests.pad_v2_all_dims_quant8
+GeneratedTests.pad_v2_low_rank
+GeneratedTests.pad_v2_low_rank_quant8
GeneratedTests.pow_2D_float_nnfw
GeneratedTests.pow_broadcast_float_nnfw
GeneratedTests.pow_broadcast_float_nnfw_2
GeneratedTests.pow_broadcast_float_nnfw_3
GeneratedTests.pow_dynamic_nnfw
+GeneratedTests.quantize_quant8
+GeneratedTests.quantize_quant8_2
+GeneratedTests.quantize_quant8_3
+GeneratedTests.quantize_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
GeneratedTests.range_ex_float_1
GeneratedTests.range_ex_float_1_all_constant_inputs
GeneratedTests.range_ex_float_1_dynamic_nnfw
GeneratedTests.space_to_batch_quant8_2_nnfw
GeneratedTests.space_to_batch_quant8_3
GeneratedTests.split_dynamic_float_nnfw
+GeneratedTests.split_v_ex_1D_float_1_nnfw
+GeneratedTests.split_v_ex_1D_float_2_nnfw
+GeneratedTests.split_v_ex_1D_int32_nnfw
+GeneratedTests.split_v_ex_4D_float_1_nnfw
+GeneratedTests.split_v_ex_4D_float_2_nnfw
+GeneratedTests.split_v_ex_4D_float_3_nnfw
+GeneratedTests.split_v_ex_4D_float_4_nnfw
+GeneratedTests.split_v_ex_4D_int32_1_nnfw
+GeneratedTests.split_v_ex_4D_int32_2_nnfw
+GeneratedTests.split_v_ex_4D_int32_3_nnfw
+GeneratedTests.split_v_ex_4D_int32_4_nnfw
GeneratedTests.sqrt_
GeneratedTests.squared_difference_ex_dynamic_nnfw
GeneratedTests.squeeze_dynamic_float_nnfw
+GeneratedTests.stateless_random_uniform_ex_nnfw
GeneratedTests.strided_slice_dynamic_nnfw
GeneratedTests.sub_dynamic_nnfw
GeneratedTests.sub_v1_2_zero_sized
GeneratedTests.abs_
-GeneratedTests.batch_to_space
-GeneratedTests.batch_to_space_float_1
-GeneratedTests.batch_to_space_quant8_1
GeneratedTests.cast_float16_to_float16
GeneratedTests.cast_float16_to_float32
GeneratedTests.cast_float16_to_float32_relaxed
GeneratedTests.hashtable_lookup_float
GeneratedTests.hashtable_lookup_float_4D_nnfw
GeneratedTests.hashtable_lookup_quant8
-GeneratedTests.l2_normalization
-GeneratedTests.l2_normalization_2
-GeneratedTests.l2_normalization_large
GeneratedTests.l2_pool_float
GeneratedTests.l2_pool_float_2
GeneratedTests.l2_pool_float_large
GeneratedTests.neg
GeneratedTests.neg_3D_int_nnfw
GeneratedTests.neg_4D_int_nnfw
-GeneratedTests.pad_quant8_nnfw
GeneratedTests.prelu
GeneratedTests.prelu_broadcast_float_1_nnfw
GeneratedTests.prelu_broadcast_quant8_1_nnfw
GeneratedTests.prelu_weight_as_input_quant8_2
GeneratedTests.prelu_weight_as_input_quant8_3
GeneratedTests.prelu_weight_as_input_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
GeneratedTests.reduce_max_quant8
GeneratedTests.reduce_max_quant8_1_nnfw
GeneratedTests.reduce_max_quant8_2
GeneratedTests.relu1_float_2
GeneratedTests.relu1_quant8_1
GeneratedTests.relu1_quant8_2
-GeneratedTests.relu6_float_1
-GeneratedTests.relu6_float_2
GeneratedTests.relu6_quant8_1
GeneratedTests.relu6_quant8_2
GeneratedTests.relu_quant8_1
GeneratedTests.relu_quant8_2
-GeneratedTests.resize_bilinear
-GeneratedTests.resize_bilinear_2
GeneratedTests.rnn
GeneratedTests.rnn_state
GeneratedTests.rsqrt
GeneratedTests.select_v1_2_two_dim_quant8
GeneratedTests.slice_5
GeneratedTests.slice_6
-GeneratedTests.slice_7
GeneratedTests.slice_8
GeneratedTests.slice_zero_sized
GeneratedTests.slice_zero_sized_quant8
-GeneratedTests.space_to_depth_float_1
-GeneratedTests.space_to_depth_float_2
-GeneratedTests.space_to_depth_float_3
-GeneratedTests.space_to_depth_quant8_1
-GeneratedTests.space_to_depth_quant8_2
GeneratedTests.sqrt_
GeneratedTests.sqrt_1D_float_nnfw
GeneratedTests.sqrt_2D_float_nnfw
GeneratedTests.cast_float16_to_quant8_overflow
GeneratedTests.cast_float32_to_float16
GeneratedTests.cast_float32_to_float16_relaxed
+GeneratedTests.cast_float32_to_int32_nnfw
GeneratedTests.cast_int32_to_float16
-GeneratedTests.cast_int32_to_quant8_overflow
GeneratedTests.cast_quant8_to_float16
GeneratedTests.concat_dynamic_nnfw
GeneratedTests.conv_dynamic_nnfw
GeneratedTests.gather_float16_8
GeneratedTests.greater_dynamic_float_nnfw
GeneratedTests.greater_equal_dynamic_float_nnfw
+GeneratedTests.l2_normalization_quant8_nnfw
GeneratedTests.less_dynamic_float_nnfw
GeneratedTests.less_equal_dynamic_float_nnfw
GeneratedTests.log_4D_float_nnfw
GeneratedTests.one_hot_ex_dynamic_nnfw
GeneratedTests.pack_ex_dynamic_nnfw
GeneratedTests.pad_dynamic_nnfw
+GeneratedTests.pad_v2_1_float
+GeneratedTests.pad_v2_1_quant8
+GeneratedTests.pad_v2_all_dims
+GeneratedTests.pad_v2_all_dims_quant8
+GeneratedTests.pad_v2_low_rank
+GeneratedTests.pad_v2_low_rank_quant8
GeneratedTests.pow_2D_float_nnfw
GeneratedTests.pow_broadcast_float_nnfw
GeneratedTests.pow_broadcast_float_nnfw_2
GeneratedTests.pow_broadcast_float_nnfw_3
GeneratedTests.pow_dynamic_nnfw
+GeneratedTests.quantize_quant8
+GeneratedTests.quantize_quant8_2
+GeneratedTests.quantize_quant8_3
+GeneratedTests.quantize_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
GeneratedTests.range_ex_float_1
GeneratedTests.range_ex_float_1_all_constant_inputs
GeneratedTests.range_ex_float_1_dynamic_nnfw
GeneratedTests.softmax_dynamic_nnfw
GeneratedTests.space_to_batch_dynamic_float_nnfw
GeneratedTests.split_dynamic_float_nnfw
+GeneratedTests.split_v_ex_1D_float_1_nnfw
+GeneratedTests.split_v_ex_1D_float_2_nnfw
+GeneratedTests.split_v_ex_1D_int32_nnfw
+GeneratedTests.split_v_ex_4D_float_1_nnfw
+GeneratedTests.split_v_ex_4D_float_2_nnfw
+GeneratedTests.split_v_ex_4D_float_3_nnfw
+GeneratedTests.split_v_ex_4D_float_4_nnfw
+GeneratedTests.split_v_ex_4D_int32_1_nnfw
+GeneratedTests.split_v_ex_4D_int32_2_nnfw
+GeneratedTests.split_v_ex_4D_int32_3_nnfw
+GeneratedTests.split_v_ex_4D_int32_4_nnfw
GeneratedTests.sqrt_
GeneratedTests.squared_difference_ex_dynamic_nnfw
GeneratedTests.squeeze_dynamic_float_nnfw
+GeneratedTests.stateless_random_uniform_ex_nnfw
GeneratedTests.strided_slice_dynamic_nnfw
GeneratedTests.sub_dynamic_nnfw
GeneratedTests.sub_v1_2_zero_sized
GeneratedTests.cast_float16_to_quant8_overflow
GeneratedTests.cast_float32_to_float16
GeneratedTests.cast_float32_to_float16_relaxed
-GeneratedTests.cast_float32_to_quant8_overflow
-GeneratedTests.cast_float32_to_quant8_overflow_relaxed
GeneratedTests.cast_int32_to_float16
-GeneratedTests.cast_int32_to_quant8_overflow
GeneratedTests.cast_quant8_to_float16
GeneratedTests.concat_dynamic_nnfw
GeneratedTests.conv_dynamic_nnfw
GeneratedTests.greater_equal_boolean
GeneratedTests.greater_equal_dynamic_float_nnfw
GeneratedTests.less_boolean
+GeneratedTests.l2_normalization_quant8_nnfw
GeneratedTests.less_dynamic_float_nnfw
GeneratedTests.less_equal_dynamic_float_nnfw
GeneratedTests.log_4D_float_nnfw
GeneratedTests.one_hot_ex_dynamic_nnfw
GeneratedTests.pack_ex_dynamic_nnfw
GeneratedTests.pad_dynamic_nnfw
+GeneratedTests.pad_v2_1_float
+GeneratedTests.pad_v2_1_quant8
+GeneratedTests.pad_v2_all_dims
+GeneratedTests.pad_v2_all_dims_quant8
+GeneratedTests.pad_v2_low_rank
+GeneratedTests.pad_v2_low_rank_quant8
GeneratedTests.pow_2D_float_nnfw
GeneratedTests.pow_broadcast_float_nnfw
GeneratedTests.pow_broadcast_float_nnfw_2
GeneratedTests.pow_broadcast_float_nnfw_3
GeneratedTests.pow_dynamic_nnfw
+GeneratedTests.quantize_quant8
+GeneratedTests.quantize_quant8_2
+GeneratedTests.quantize_quant8_3
+GeneratedTests.quantize_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
GeneratedTests.range_ex_float_1
GeneratedTests.range_ex_float_1_all_constant_inputs
GeneratedTests.range_ex_float_1_dynamic_nnfw
GeneratedTests.softmax_dynamic_nnfw
GeneratedTests.space_to_batch_dynamic_float_nnfw
GeneratedTests.split_dynamic_float_nnfw
+GeneratedTests.split_v_ex_1D_float_1_nnfw
+GeneratedTests.split_v_ex_1D_float_2_nnfw
+GeneratedTests.split_v_ex_1D_int32_nnfw
+GeneratedTests.split_v_ex_4D_float_1_nnfw
+GeneratedTests.split_v_ex_4D_float_2_nnfw
+GeneratedTests.split_v_ex_4D_float_3_nnfw
+GeneratedTests.split_v_ex_4D_float_4_nnfw
+GeneratedTests.split_v_ex_4D_int32_1_nnfw
+GeneratedTests.split_v_ex_4D_int32_2_nnfw
+GeneratedTests.split_v_ex_4D_int32_3_nnfw
+GeneratedTests.split_v_ex_4D_int32_4_nnfw
GeneratedTests.sqrt_
GeneratedTests.squared_difference_ex_dynamic_nnfw
GeneratedTests.squeeze_dynamic_float_nnfw
+GeneratedTests.stateless_random_uniform_ex_nnfw
GeneratedTests.strided_slice_dynamic_nnfw
GeneratedTests.sub_dynamic_nnfw
GeneratedTests.sub_v1_2_zero_sized
GeneratedTests.abs_
-GeneratedTests.batch_to_space
-GeneratedTests.batch_to_space_float_1
-GeneratedTests.batch_to_space_quant8_1
GeneratedTests.cast_float16_to_float16
GeneratedTests.cast_float16_to_float32
GeneratedTests.cast_float16_to_float32_relaxed
GeneratedTests.hashtable_lookup_float
GeneratedTests.hashtable_lookup_float_4D_nnfw
GeneratedTests.hashtable_lookup_quant8
-GeneratedTests.l2_normalization
-GeneratedTests.l2_normalization_2
-GeneratedTests.l2_normalization_large
GeneratedTests.l2_pool_float
GeneratedTests.l2_pool_float_2
GeneratedTests.l2_pool_float_large
GeneratedTests.neg
GeneratedTests.neg_3D_int_nnfw
GeneratedTests.neg_4D_int_nnfw
-GeneratedTests.pad_quant8_nnfw
GeneratedTests.prelu
GeneratedTests.prelu_broadcast_float_1_nnfw
GeneratedTests.prelu_broadcast_quant8_1_nnfw
GeneratedTests.prelu_weight_as_input_quant8_2
GeneratedTests.prelu_weight_as_input_quant8_3
GeneratedTests.prelu_weight_as_input_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
GeneratedTests.reduce_max_quant8
GeneratedTests.reduce_max_quant8_1_nnfw
GeneratedTests.reduce_max_quant8_2
GeneratedTests.relu1_float_2
GeneratedTests.relu1_quant8_1
GeneratedTests.relu1_quant8_2
-GeneratedTests.relu6_float_1
-GeneratedTests.relu6_float_2
GeneratedTests.relu6_quant8_1
GeneratedTests.relu6_quant8_2
GeneratedTests.relu_quant8_1
GeneratedTests.relu_quant8_2
-GeneratedTests.resize_bilinear
-GeneratedTests.resize_bilinear_2
GeneratedTests.rnn
GeneratedTests.rnn_state
GeneratedTests.rsqrt
GeneratedTests.select_v1_2_two_dim_quant8
GeneratedTests.slice_5
GeneratedTests.slice_6
-GeneratedTests.slice_7
GeneratedTests.slice_8
GeneratedTests.slice_zero_sized
GeneratedTests.slice_zero_sized_quant8
-GeneratedTests.space_to_depth_float_1
-GeneratedTests.space_to_depth_float_2
-GeneratedTests.space_to_depth_float_3
-GeneratedTests.space_to_depth_quant8_1
-GeneratedTests.space_to_depth_quant8_2
GeneratedTests.sqrt_
GeneratedTests.sqrt_1D_float_nnfw
GeneratedTests.sqrt_2D_float_nnfw
GeneratedTests.l2_normalization
GeneratedTests.l2_normalization_2
GeneratedTests.l2_normalization_large
+GeneratedTests.l2_normalization_quant8_nnfw
GeneratedTests.l2_pool_float
GeneratedTests.l2_pool_float_2
GeneratedTests.l2_pool_float_large
GeneratedTests.pack_ex_dynamic_nnfw
GeneratedTests.pad_dynamic_nnfw
GeneratedTests.pad_quant8_nnfw
+GeneratedTests.pad_v2_1_float
+GeneratedTests.pad_v2_1_quant8
+GeneratedTests.pad_v2_all_dims
+GeneratedTests.pad_v2_all_dims_quant8
+GeneratedTests.pad_v2_low_rank
+GeneratedTests.pad_v2_low_rank_quant8
GeneratedTests.pow_2D_float_nnfw
GeneratedTests.pow_broadcast_float_nnfw
GeneratedTests.pow_broadcast_float_nnfw_2
GeneratedTests.prelu_weight_as_input_quant8_2
GeneratedTests.prelu_weight_as_input_quant8_3
GeneratedTests.prelu_weight_as_input_quant8_4
+GeneratedTests.quantize_quant8
+GeneratedTests.quantize_quant8_2
+GeneratedTests.quantize_quant8_3
+GeneratedTests.quantize_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
GeneratedTests.range_ex_float_1
GeneratedTests.range_ex_float_1_all_constant_inputs
GeneratedTests.range_ex_float_1_dynamic_nnfw
GeneratedTests.reshape_dynamic_nnfw
GeneratedTests.resize_bilinear
GeneratedTests.resize_bilinear_2
+GeneratedTests.resize_bilinear_quant8_nnfw
GeneratedTests.reverse_ex_1d
GeneratedTests.reverse_ex_3d
GeneratedTests.reverse_ex_dynamic_1D
GeneratedTests.split_quant8_2_relaxed
GeneratedTests.split_quant8_3
GeneratedTests.split_quant8_4
+GeneratedTests.split_v_ex_1D_float_1_nnfw
+GeneratedTests.split_v_ex_1D_float_2_nnfw
+GeneratedTests.split_v_ex_1D_int32_nnfw
+GeneratedTests.split_v_ex_4D_float_1_nnfw
+GeneratedTests.split_v_ex_4D_float_2_nnfw
+GeneratedTests.split_v_ex_4D_float_3_nnfw
+GeneratedTests.split_v_ex_4D_float_4_nnfw
+GeneratedTests.split_v_ex_4D_int32_1_nnfw
+GeneratedTests.split_v_ex_4D_int32_2_nnfw
+GeneratedTests.split_v_ex_4D_int32_3_nnfw
+GeneratedTests.split_v_ex_4D_int32_4_nnfw
GeneratedTests.sqrt_
GeneratedTests.sqrt_1D_float_nnfw
GeneratedTests.sqrt_2D_float_nnfw
GeneratedTests.squeeze_float_1_relaxed
GeneratedTests.squeeze_quant8_1
GeneratedTests.squeeze_relaxed
+GeneratedTests.stateless_random_uniform_ex_nnfw
GeneratedTests.strided_slice
GeneratedTests.strided_slice_dynamic_nnfw
GeneratedTests.strided_slice_float_1
GeneratedTests.abs_
-GeneratedTests.batch_to_space
-GeneratedTests.batch_to_space_float_1
-GeneratedTests.batch_to_space_quant8_1
GeneratedTests.cast_float16_to_float16
GeneratedTests.cast_float16_to_float32
GeneratedTests.cast_float16_to_float32_relaxed
GeneratedTests.hashtable_lookup_float
GeneratedTests.hashtable_lookup_float_4D_nnfw
GeneratedTests.hashtable_lookup_quant8
-GeneratedTests.l2_normalization
-GeneratedTests.l2_normalization_2
-GeneratedTests.l2_normalization_large
GeneratedTests.l2_pool_float
GeneratedTests.l2_pool_float_2
GeneratedTests.l2_pool_float_large
GeneratedTests.neg
GeneratedTests.neg_3D_int_nnfw
GeneratedTests.neg_4D_int_nnfw
-GeneratedTests.pad_quant8_nnfw
GeneratedTests.prelu
GeneratedTests.prelu_broadcast_float_1_nnfw
GeneratedTests.prelu_broadcast_quant8_1_nnfw
GeneratedTests.prelu_weight_as_input_quant8_2
GeneratedTests.prelu_weight_as_input_quant8_3
GeneratedTests.prelu_weight_as_input_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
GeneratedTests.reduce_max_quant8
GeneratedTests.reduce_max_quant8_1_nnfw
GeneratedTests.reduce_max_quant8_2
GeneratedTests.relu1_float_2
GeneratedTests.relu1_quant8_1
GeneratedTests.relu1_quant8_2
-GeneratedTests.relu6_float_1
-GeneratedTests.relu6_float_2
GeneratedTests.relu6_quant8_1
GeneratedTests.relu6_quant8_2
GeneratedTests.relu_quant8_1
GeneratedTests.relu_quant8_2
-GeneratedTests.resize_bilinear
-GeneratedTests.resize_bilinear_2
GeneratedTests.rnn
GeneratedTests.rnn_state
GeneratedTests.rsqrt
GeneratedTests.select_v1_2_two_dim_quant8
GeneratedTests.slice_5
GeneratedTests.slice_6
-GeneratedTests.slice_7
GeneratedTests.slice_8
GeneratedTests.slice_zero_sized
GeneratedTests.slice_zero_sized_quant8
-GeneratedTests.space_to_depth_float_1
-GeneratedTests.space_to_depth_float_2
-GeneratedTests.space_to_depth_float_3
-GeneratedTests.space_to_depth_quant8_1
-GeneratedTests.space_to_depth_quant8_2
GeneratedTests.sqrt_
GeneratedTests.sqrt_1D_float_nnfw
GeneratedTests.sqrt_2D_float_nnfw
--- /dev/null
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{8}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{8}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 8)
+
+i2 = Output("op2", "TENSOR_FLOAT32", "{1}")
+i3 = Output("op3", "TENSOR_FLOAT32", "{1}")
+i4 = Output("op4", "TENSOR_FLOAT32", "{1}")
+i5 = Output("op5", "TENSOR_FLOAT32", "{1}")
+i6 = Output("op6", "TENSOR_FLOAT32", "{1}")
+i7 = Output("op7", "TENSOR_FLOAT32", "{1}")
+i8 = Output("op8", "TENSOR_FLOAT32", "{1}")
+i9 = Output("op9", "TENSOR_FLOAT32", "{1}")
+
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3, i4, i5, i6, i7, i8, i9])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
+ size_splits:
+ [1, 1, 1, 1, 1, 1, 1, 1],
+ split_dim:
+ [0]
+ }
+
+output0 = {
+ i2: # output 0
+ [1.0],
+ i3: # output 1
+ [2.0],
+ i4: # output 2
+ [3.0],
+ i5: # output 3
+ [4.0],
+ i6: # output 4
+ [5.0],
+ i7: # output 5
+ [6.0],
+ i8: # output 6
+ [7.0],
+ i9: # output 7
+ [8.0]}
+
+# Instantiate an example
+Example((input0, output0))
--- /dev/null
+# model
+input0 = Input("input0", "TENSOR_FLOAT32", "{12}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{3}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits",3);
+
+output0 = Output("output0", "TENSOR_FLOAT32", "{3}")
+output1 = Output("output1", "TENSOR_FLOAT32", "{5}")
+output2 = Output("output2", "TENSOR_FLOAT32", "{4}")
+
+model = Model().Operation("SPLIT_V_EX", input0, size_splits, split_dim, num_splits).To((output0, output1, output2))
+
+# Example 1.
+input_dict = {
+ input0: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0],
+ size_splits: [3, 5, 4],
+ split_dim: [0]
+}
+output_dict = {
+ output0: [1.0, 2.0, 3.0],
+ output1: [4.0, 5.0, 6.0, 7.0, 8.0],
+ output2: [9.0, 10.0, 11.0, 12.0]
+}
+
+Example((input_dict, output_dict))
--- /dev/null
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_INT32", "{8}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{8}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 8)
+
+i2 = Output("op2", "TENSOR_INT32", "{1}")
+i3 = Output("op3", "TENSOR_INT32", "{1}")
+i4 = Output("op4", "TENSOR_INT32", "{1}")
+i5 = Output("op5", "TENSOR_INT32", "{1}")
+i6 = Output("op6", "TENSOR_INT32", "{1}")
+i7 = Output("op7", "TENSOR_INT32", "{1}")
+i8 = Output("op8", "TENSOR_INT32", "{1}")
+i9 = Output("op9", "TENSOR_INT32", "{1}")
+
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3, i4, i5, i6, i7, i8, i9])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 2, 3, 4, 5, 6, 7, 8],
+ size_splits:
+ [1, 1, 1, 1, 1, 1, 1, 1],
+ split_dim:
+ [0]
+ }
+
+output0 = {
+ i2: # output 0
+ [1],
+ i3: # output 1
+ [2],
+ i4: # output 2
+ [3],
+ i5: # output 3
+ [4],
+ i6: # output 4
+ [5],
+ i7: # output 5
+ [6],
+ i8: # output 6
+ [7],
+ i9: # output 7
+ [8]}
+
+# Instantiate an example
+Example((input0, output0))
--- /dev/null
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{2,2,2,2}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{2}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 2)
+
+i2 = Output("op2", "TENSOR_FLOAT32", "{1,2,2,2}")
+i3 = Output("op3", "TENSOR_FLOAT32", "{1,2,2,2}")
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0],
+ size_splits:
+ [8, 8],
+ split_dim:
+ [0]
+ }
+
+output0 = {
+ i2: # output 0
+ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
+ i3: # output 1
+ [9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}
+
+# Instantiate an example
+Example((input0, output0))
--- /dev/null
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{2,2,2,2}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{2}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 2)
+
+i2 = Output("op2", "TENSOR_FLOAT32", "{2,2,2,1}")
+i3 = Output("op3", "TENSOR_FLOAT32", "{2,2,2,1}")
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0],
+ size_splits:
+ [8, 8],
+ split_dim:
+ [3]}
+
+output0 = {
+ i2: # output 0
+ [1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0],
+ i3: # output 1
+ [2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0]}
+
+# Instantiate an example
+Example((input0, output0))
--- /dev/null
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{2,2,2,2}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{2}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 2)
+
+i2 = Output("op2", "TENSOR_FLOAT32", "{1,2,2,2}")
+i3 = Output("op3", "TENSOR_FLOAT32", "{1,2,2,2}")
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0],
+ size_splits:
+ [8, 8],
+ split_dim:
+ [-4]
+ }
+
+output0 = {
+ i2: # output 0
+ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
+ i3: # output 1
+ [9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}
+
+# Instantiate an example
+Example((input0, output0))
--- /dev/null
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{4,1,1,8}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{3}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 3)
+
+i2 = Output("op2", "TENSOR_FLOAT32", "{4,1,1,2}")
+i3 = Output("op3", "TENSOR_FLOAT32", "{4,1,1,4}")
+i4 = Output("op4", "TENSOR_FLOAT32", "{4,1,1,2}")
+
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3, i4])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0],
+ size_splits:
+ [2,4,2],
+ split_dim:
+ [3]
+ }
+
+output0 = {
+ i2: # output 0
+ [1.0, 2.0, 9.0, 10.0, 17.0, 18.0, 25.0, 26.0],
+ i3: # output 1
+ [3.0, 4.0, 5.0, 6.0, 11.0, 12.0, 13.0, 14.0, 19.0, 20.0, 21.0, 22.0, 27.0, 28.0, 29.0, 30.0],
+ i4: [7.0, 8.0, 15.0, 16.0, 23.0, 24.0, 31.0, 32.0]}
+
+# Instantiate an example
+Example((input0, output0))
--- /dev/null
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{2}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 2)
+
+i2 = Output("op2", "TENSOR_INT32", "{1,2,2,2}")
+i3 = Output("op3", "TENSOR_INT32", "{1,2,2,2}")
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ size_splits:
+ [8, 8],
+ split_dim:
+ [0]}
+
+output0 = {
+ i2: # output 0
+ [1, 2, 3, 4, 5, 6, 7, 8],
+ i3: # output 1
+ [9, 10, 11, 12, 13, 14, 15, 16]}
+
+# Instantiate an example
+Example((input0, output0))
--- /dev/null
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{2}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 2)
+
+i2 = Output("op2", "TENSOR_INT32", "{2,1,2,2}")
+i3 = Output("op3", "TENSOR_INT32", "{2,1,2,2}")
+
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ size_splits:
+ [8, 8],
+ split_dim:
+ [1]}
+
+output0 = {
+ i2: # output 0
+ [1, 2, 3, 4, 9, 10, 11, 12],
+ i3: # output 1
+ [5, 6, 7, 8, 13, 14, 15, 16]}
+
+# Instantiate an example
+Example((input0, output0))
--- /dev/null
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
+
+size_splits = Input("size_splits", "TENSOR_INT32", "{2}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 2)
+
+i2 = Output("op2", "TENSOR_INT32", "{2,2,1,2}")
+i3 = Output("op3", "TENSOR_INT32", "{2,2,1,2}")
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ size_splits:
+ [8, 8],
+ split_dim:
+ [2]}
+
+output0 = {
+ i2: # output 0
+ [1, 2, 5, 6, 9, 10, 13, 14],
+ i3: # output 1
+ [3, 4, 7, 8, 11, 12, 15, 16]}
+
+# Instantiate an example
+Example((input0, output0))
--- /dev/null
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
+
+size_splits = Input("size_splits", "TENSOR_INT32", "{2}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 2)
+
+i2 = Output("op2", "TENSOR_INT32", "{2,2,2,1}")
+i3 = Output("op3", "TENSOR_INT32", "{2,2,2,1}")
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+ size_splits:
+ [8, 8],
+ split_dim:
+ [3]}
+
+output0 = {
+ i2: # output 0
+ [1, 3, 5, 7, 9, 11, 13, 15],
+ i3: # output 1
+ [2, 4, 6, 8, 10, 12, 14, 16]}
+
+# Instantiate an example
+Example((input0, output0))
--- /dev/null
+#
+# Copyright (C) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+model = Model()
+
+i1 = Input("input1", "TENSOR_INT32", "{1}")
+i2 = Input("input2", "TENSOR_INT32", "{2}")
+
+o1 = Output("output0", "TENSOR_FLOAT32", "{10}")
+
+model = model.Operation("STATELESS_RANDOM_UNIFORM_EX", i1, i2).To(o1)
+
+# Example.
+input0 = {
+ i1 : [10], #input1
+ i2 : [1, 1] #input2
+}
+
+output0 = {
+ o1: [0.09827709, 0.14063823, 0.4553436,
+ 0.10658443, 0.2075988, 0.30841374,
+ 0.7489233, 0.90613365, 0.63342273,
+ 0.37854457]
+}
+
+Example((input0, output0))
--- /dev/null
+#
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+model = Model()
+in0 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 1, 1, 3}, 2e-7, 128")
+out0 = Output("op2", "TENSOR_QUANT8_ASYMM", "{1, 1, 1, 3}, 2e-7, 128")
+model = model.Operation("L2_NORMALIZATION", in0).To(out0)
+
+# Example 1. Input in operand 0,
+input0 = {in0: # input 0
+ [0, 5, 12]}
+output0 = {out0: # output 0
+ [51, 54, 58]}
+
+# Instantiate an example
+Example((input0, output0))
--- /dev/null
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.8, 5")
+i2 = Output("op2", "TENSOR_QUANT8_ASYMM", "{1, 3, 3, 1}, 0.8, 5")
+w = Int32Scalar("width", 3)
+h = Int32Scalar("height", 3)
+model = model.Operation("RESIZE_BILINEAR", i1, w, h).To(i2)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+ [1, 1, 2, 2]}
+output0 = {i2: # output 0
+ [1, 1, 1,
+ 2, 2, 2,
+ 2, 2, 2]}
+
+# Instantiate an example
+Example((input0, output0))
target_link_libraries(${RUNTIME_NNFW_API_TEST} nnfw-dev)
target_link_libraries(${RUNTIME_NNFW_API_TEST} gtest gmock)
target_link_libraries(${RUNTIME_NNFW_API_TEST} ${LIB_PTHREAD} dl)
+target_link_libraries(${RUNTIME_NNFW_API_TEST} circle_schema)
install(TARGETS ${RUNTIME_NNFW_API_TEST} DESTINATION unittest_standalone)
- Validation Tests (fixture format `ValidationTest???`)
- Basic positive/negative tests with simple nnpackages
+- Generated Model Tests (fixture format `GenModelTest`)
+ - One-time inference test with variety of generated models
- Regression Tests (fixture format `RegressionTest`, test format `GitHub###`)
- When you see bugs/crashes while using those API
- Must refer a github issue
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_API_TEST_CIRCLE_GEN_H__
+#define __NNFW_API_TEST_CIRCLE_GEN_H__
+
+#include <circle_schema_generated.h>
+
+#include <vector>
+
+/**
+ * @brief Class for storing flatbuffer buffer
+ *
+ * This is a simple wrapper for a finished FlatBufferBuilder. It owns the buffer and a user can
+ * get the buffer pointer and size.
+ */
+class CircleBuffer
+{
+public:
+ CircleBuffer() = default;
+ explicit CircleBuffer(flatbuffers::FlatBufferBuilder &&fbb) : _fbb{std::move(fbb)}
+ {
+ _fbb.Finished(); // The build must have been finished, so check that here
+ }
+
+ uint8_t *buffer() { return _fbb.GetBufferPointer(); }
+ size_t size() { return _fbb.GetSize(); }
+
+private:
+ flatbuffers::FlatBufferBuilder _fbb;
+};
+
+/**
+ * @brief Circle flatbuffer file generator
+ *
+ * This is a helper class for generating circle file.
+ *
+ */
+class CircleGen
+{
+public:
+ struct TensorParams
+ {
+ std::vector<int32_t> shape;
+ circle::TensorType tensor_type = circle::TensorType::TensorType_FLOAT32;
+ uint32_t buffer = 0;
+ std::string name;
+ };
+
+ struct OperatorParams
+ {
+ std::vector<int32_t> inputs;
+ std::vector<int32_t> outputs;
+ int version = 1;
+ };
+
+public:
+ CircleGen()
+ {
+ // 0th buffer is always the empty buffer for non-const tensors
+ addBuffer(nullptr, 0);
+ }
+
+ template <typename T> uint32_t addBuffer(const std::vector<T> &buf_vec)
+ {
+ auto buf = reinterpret_cast<const uint8_t *>(buf_vec.data());
+ auto size = buf_vec.size() * sizeof(T);
+ return addBuffer(buf, size);
+ }
+
+ uint32_t addBuffer(const uint8_t *buf, size_t size)
+ {
+ uint32_t ind = _buffers.size();
+ _buffers.emplace_back(buildBuffer(buf, size));
+ return ind;
+ }
+
+ uint32_t addTensor(const TensorParams ¶ms)
+ {
+ int ind = _tensors.size();
+ _tensors.emplace_back(buildTensor(params));
+ return ind;
+ }
+
+ uint32_t setInputsAndOutputs(const std::vector<int> &inputs, const std::vector<int> &outputs)
+ {
+ _inputs = inputs;
+ _outputs = outputs;
+ }
+
+ CircleBuffer finish()
+ {
+ // TODO Support multiple subgraphs, for now only single subgraph model is supported.
+ std::vector<flatbuffers::Offset<circle::SubGraph>> subgraphs{buildSubGraph()};
+ auto model =
+ circle::CreateModelDirect(_fbb, 3, &_opcodes, &subgraphs, "CircleGen generated", &_buffers);
+ _fbb.Finish(model);
+ return CircleBuffer{std::move(_fbb)};
+ }
+
+ // ===== Add Operator methods begin =====
+
+ uint32_t addOperatorAdd(const OperatorParams ¶ms, circle::ActivationFunctionType actfn)
+ {
+ auto options = circle::CreateAddOptions(_fbb, actfn).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_ADD,
+ circle::BuiltinOptions_AddOptions, options);
+ }
+
+ uint32_t addOperatorAveragePool2D(const OperatorParams ¶ms, circle::Padding padding,
+ int stride_w, int stride_h, int filter_w, int filter_h,
+ circle::ActivationFunctionType actfn)
+ {
+ auto options =
+ circle::CreatePool2DOptions(_fbb, padding, stride_w, stride_h, filter_w, filter_h, actfn)
+ .Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_AVERAGE_POOL_2D,
+ circle::BuiltinOptions_Pool2DOptions, options);
+ }
+
+ // NOTE Please add addOperator functions ABOVE this lie
+ //
+ // % How to add a new addOperatorXXX fuction
+ // 0. Copy code from one of the existing addOperatorXXX function
+ // 1. Change the function signature (need BuiltinOperator params)
+ // 2. Change enum BuiltinOperator
+ // 3. Change enum BuiltinOptions
+ // 4. Change CreateXXXOptions accordingly
+
+ // ===== Add Operator methods end =====
+
+private:
+ uint32_t addOperatorWithOptions(const OperatorParams ¶ms, circle::BuiltinOperator opcode,
+ circle::BuiltinOptions options_type,
+ flatbuffers::Offset<void> options)
+ {
+ uint32_t opcode_ind = addOperatorCode(opcode);
+ auto op = circle::CreateOperatorDirect(_fbb, opcode_ind, ¶ms.inputs, ¶ms.outputs,
+ options_type, options);
+
+ uint32_t ind = _operators.size();
+ _operators.emplace_back(op);
+ return ind;
+ }
+
+ uint32_t addOperatorCode(circle::BuiltinOperator opcode)
+ {
+ // TODO If the same OperatorCode is registered already, just return it
+ uint32_t ind = _opcodes.size();
+ _opcodes.emplace_back(circle::CreateOperatorCode(_fbb, opcode));
+ return ind;
+ }
+
+ flatbuffers::Offset<circle::Buffer> buildBuffer(const uint8_t *buf, size_t size)
+ {
+ if (buf == nullptr && size == 0)
+ return circle::CreateBuffer(_fbb);
+ auto buffer = _fbb.CreateVector(buf, size);
+ return circle::CreateBuffer(_fbb, buffer);
+ }
+
+ flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams ¶ms)
+ {
+ auto shape = _fbb.CreateVector(params.shape);
+ auto name = _fbb.CreateString(params.name);
+ return circle::CreateTensor(_fbb, shape, params.tensor_type, params.buffer, name,
+ 0 /* QuantParam */, false /* is_variable */, 0 /* sparsity */,
+ 0 /* shape_signature */);
+ }
+
+ flatbuffers::Offset<circle::SubGraph> buildSubGraph()
+ {
+ return circle::CreateSubGraphDirect(_fbb, &_tensors, &_inputs, &_outputs, &_operators, nullptr);
+ }
+
+private:
+ flatbuffers::FlatBufferBuilder _fbb{1024};
+ std::vector<flatbuffers::Offset<circle::Buffer>> _buffers;
+ std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes;
+
+ // per-subgraph
+ std::vector<int> _inputs;
+ std::vector<int> _outputs;
+ std::vector<flatbuffers::Offset<circle::Tensor>> _tensors;
+ std::vector<flatbuffers::Offset<circle::Operator>> _operators;
+};
+
+#endif // __NNFW_API_TEST_CIRCLE_GEN_H__
--- /dev/null
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <nnfw_internal.h>
+
+#include <fstream>
+
+#include "CircleGen.h"
+#include "fixtures.h"
+
+/**
+ * @brief Generated Model test fixture for a one time inference
+ *
+ * This fixture is for one-time inference test with variety of generated models.
+ * It is the user's responsiblity to create @c _cbuf , @c _ref_inputs and @c _ref_outputs in the
+ * test body, which are generated circle buffer, model input data and output data respectively.
+ * The rest(calling API functions for execution) is done by @c Setup and @c TearDown .
+ *
+ */
+class GenModelTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { NNFW_ENSURE_SUCCESS(nnfw_create_session(&_so.session)); }
+
+ void TearDown() override
+ {
+ NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(_so.session, _cbuf.buffer(), _cbuf.size()));
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(_so.session));
+
+ // In/Out buffer settings
+ {
+ uint32_t num_inputs;
+ NNFW_ENSURE_SUCCESS(nnfw_input_size(_so.session, &num_inputs));
+ _so.inputs.resize(num_inputs);
+ for (uint32_t ind = 0; ind < _so.inputs.size(); ind++)
+ {
+ nnfw_tensorinfo ti;
+ NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_so.session, ind, &ti));
+ uint64_t input_elements = num_elems(&ti);
+ _so.inputs[ind].resize(input_elements);
+
+ ASSERT_EQ(nnfw_set_input(_so.session, ind, ti.dtype, _so.inputs[ind].data(),
+ sizeof(float) * input_elements),
+ NNFW_STATUS_NO_ERROR);
+ }
+
+ uint32_t num_outputs;
+ NNFW_ENSURE_SUCCESS(nnfw_output_size(_so.session, &num_outputs));
+ _so.outputs.resize(num_outputs);
+ for (uint32_t ind = 0; ind < _so.outputs.size(); ind++)
+ {
+ nnfw_tensorinfo ti;
+ NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_so.session, ind, &ti));
+ uint64_t output_elements = num_elems(&ti);
+ _so.outputs[ind].resize(output_elements);
+ ASSERT_EQ(nnfw_set_output(_so.session, ind, ti.dtype, _so.outputs[ind].data(),
+ sizeof(float) * output_elements),
+ NNFW_STATUS_NO_ERROR);
+ }
+ }
+
+ // Set input values, run, and check output values
+ {
+ ASSERT_EQ(_so.inputs.size(), _ref_inputs.size());
+ for (uint32_t i = 0; i < _so.inputs.size(); i++)
+ {
+ // Fill the values
+ ASSERT_EQ(_so.inputs[i].size(), _ref_inputs[i].size());
+ memcpy(_so.inputs[i].data(), _ref_inputs[i].data(), _so.inputs[i].size() * sizeof(float));
+ }
+
+ NNFW_ENSURE_SUCCESS(nnfw_run(_so.session));
+
+ ASSERT_EQ(_so.outputs.size(), _ref_outputs.size());
+ for (uint32_t i = 0; i < _so.outputs.size(); i++)
+ {
+ // Check output tensor values
+ auto &ref_output = _ref_outputs[i];
+ auto &output = _so.outputs[i];
+ ASSERT_EQ(output.size(), ref_output.size());
+ for (uint32_t e = 0; e < ref_output.size(); e++)
+ ASSERT_FLOAT_EQ(ref_output[e], output[e]);
+ }
+ }
+
+ NNFW_ENSURE_SUCCESS(nnfw_close_session(_so.session));
+ }
+
+protected:
+ SessionObject _so;
+ CircleBuffer _cbuf;
+ std::vector<std::vector<float>> _ref_inputs;
+ std::vector<std::vector<float>> _ref_outputs;
+};
+
+TEST_F(GenModelTest, OneOp_Add_VarToConst)
+{
+ CircleGen cgen;
+ std::vector<float> rhs_data{5, 4, 7, 4};
+ uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs}, {out});
+ _cbuf = cgen.finish();
+
+ _ref_inputs = {{1, 3, 2, 4}};
+ _ref_outputs = {{6, 7, 9, 8}};
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVar)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+ _cbuf = cgen.finish();
+
+ _ref_inputs = {{1, 3, 2, 4}, {5, 4, 7, 4}};
+ _ref_outputs = {{6, 7, 9, 8}};
+}
+
+TEST_F(GenModelTest, OneOp_AvgPool2D)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+ _cbuf = cgen.finish();
+
+ _ref_inputs = {{1, 3, 2, 4}};
+ _ref_outputs = {{2.5}};
+}
*/
#include <gtest/gtest.h>
-#include <nnfw_debug.h>
+#include <nnfw_internal.h>
#include "common.h"
#include "fixtures.h"
{
NNFW_STATUS res = nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_INT32, new_shape.data(),
sizeof(int) * new_shape.size());
- ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(res);
res = nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, actual_output->data(),
sizeof(float) * actual_output_size);
- ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(res);
}
void prepare_and_set_input_output(const std::vector<int> &new_shape, int actual_output_size,
std::vector<float> *actual_output)
{
- ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
NNFW_STATUS res = NNFW_STATUS_ERROR;
res = nnfw_prepare(_session);
- ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(res);
set_input_output(new_shape, actual_output_size, actual_output);
// real test case should start from calling nnfw_run()
if (no_run_error)
{
- ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(res);
// output shape check
nnfw_tensorinfo info;
- ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &info), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &info));
ASSERT_EQ(info.rank, new_shape.size());
for (uint32_t d = 0; d < info.rank; ++d)
ASSERT_EQ(info.dims[d], new_shape[d]);
// Do inference
NNFW_STATUS res = nnfw_run(_session);
- ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(res);
// output value check
for (int i = 0; i < expected.size(); ++i)
TEST_F(TestDynamicTensorReshapeModelLoaded, reshape_multiple_executions)
{
- ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
NNFW_STATUS res = nnfw_prepare(_session);
- ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(res);
std::vector<int> new_shape;
std::vector<float> expected = {-1.5, -1.0, -0.5, 0.5, 1.0, 1.5};
TEST_F(TestDynamicTensorReshapeModelLoaded, neg_reshape_multiple_executions)
{
- ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
NNFW_STATUS res = nnfw_prepare(_session);
- ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(res);
std::vector<int> new_shape;
std::vector<float> expected = {-1.5, -1.0, -0.5, 0.5, 1.0, 1.5};
const std::vector<float> &input1,
std::vector<float> *actual_output, nnfw_tensorinfo input0_ti)
{
- ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_set_input_tensorinfo(_session, 0, &input0_ti), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+ NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &input0_ti));
ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input0.data(),
sizeof(float) * input0.size()),
*/
TEST_F(TestInputUnknownDimInputConcatModelLoaded, concat_input0_to_2x3)
{
- ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
const std::vector<float> input0 = {1, 2, 3}; // of shape [1, 3]
const std::vector<float> input1 = {4, 5, 6, 7, 8, 9}; // of shape [2, 3]
// input reshaping to [1, 3]
nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 3}};
- ASSERT_EQ(nnfw_set_input_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
set_input_output(_session, input0, input1, actual_output);
// Do inference
NNFW_STATUS res = nnfw_run(_session);
- ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(res);
// output value check
for (int i = 0; i < expected.size(); ++i)
*/
TEST_F(TestInputUnknownDimInputConcatModelLoaded, neg_concat_input0_to_wrong_shape)
{
- ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
const std::vector<float> input0 = {1, 2, 3}; // of shape [3, 1], wrong shape
const std::vector<float> input1 = {4, 5, 6, 7, 8, 9}; // of shape [2, 3]
// input reshaping to [3, 1]
nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 2, {3, 1}};
- ASSERT_EQ(nnfw_set_input_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_ERROR);
}
TEST_F(TestDynamicTensorApplyTensorInfoBinaryOp, set_input_tensorinfo_after_compilation_add)
{
- ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
// input reshaping to [2, 2, 3]
nnfw_tensorinfo input0_ti = {NNFW_TYPE_TENSOR_FLOAT32, 3, {2, 2, 3}};
std::vector<float> expected_output = {1.1 * 2, 2.1 * 2, 3.1 * 2, 4.1 * 2, 5.1 * 2, 6.1 * 2,
7.1 * 2, 8.1 * 2, 9.1 * 2, 10.1 * 2, 11.1 * 2, 12.1 * 2};
- ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
- ASSERT_EQ(nnfw_set_input_tensorinfo(_session, 0, &input0_ti), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &input0_ti));
set_input_output(_session, input0, input1, actual_output);
// Do inference
NNFW_STATUS res = nnfw_run(_session);
- ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(res);
// output value check
for (int i = 0; i < expected_output.size(); ++i)
TEST_F(TestDynamicTensorApplyTensorInfoUnaryOp, set_input_tensorinfo_after_compilation_neg)
{
- ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
nnfw_tensorinfo input0_ti_original = {NNFW_TYPE_TENSOR_FLOAT32, 2, {4, 4}};
expected_output[i] = -1 * input0[i];
}
- ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
// input shape check
{
nnfw_tensorinfo ti = {};
- ASSERT_EQ(nnfw_input_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_session, 0, &ti));
ASSERT_TRUE(tensorInfoEqual(input0_ti_original, ti));
}
- ASSERT_EQ(nnfw_set_input_tensorinfo(_session, 0, &input0_ti), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &input0_ti));
// input shape check
{
nnfw_tensorinfo ti = {};
- ASSERT_EQ(nnfw_input_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_session, 0, &ti));
ASSERT_TRUE(tensorInfoEqual(input0_ti, ti));
}
// Do inference
NNFW_STATUS res = nnfw_run(_session);
- ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(res);
// output value check
for (int i = 0; i < expected_output.size(); ++i)
TEST_F(TestWhileDynamicModelLoaded, run_verify)
{
- ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
std::vector<float> actual_output0(10);
nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 3, {1, 28, 28}};
- ASSERT_EQ(nnfw_set_input_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
set_input_output(_session, while_dynamic_input0, actual_output0);
- ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_run(_session));
nnfw_tensorinfo ti_output0_expected = {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 10}};
- ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &ti));
ASSERT_TRUE(tensorInfoEqual(ti, ti_output0_expected));
// output value check
TEST_F(TestWhileDynamicModelLoaded, neg_run_verify)
{
- ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 3, {1, 28, 28}};
- ASSERT_EQ(nnfw_set_input_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
// Insufficient size of output (10 or more is sufficient)
std::vector<float> actual_output0(9);
TEST_F(TestIfDynamicModelLoaded, run_verify)
{
- ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
nnfw_tensorinfo ti_output0_expected = {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 10}};
// Output tensor sizes are inferenced after `nnfw_prepare`
{
nnfw_tensorinfo ti;
- ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &ti));
ASSERT_TRUE(tensorInfoEqual(ti, ti_output0_expected));
}
std::vector<float> actual_output0(10);
set_input_output(_session, if_dynamic_input0, actual_output0);
- ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_run(_session));
// Check output tensor sizes again
{
nnfw_tensorinfo ti;
- ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &ti));
ASSERT_TRUE(tensorInfoEqual(ti, ti_output0_expected));
}
*/
#include <gtest/gtest.h>
-#include <nnfw_debug.h>
+#include <nnfw_internal.h>
#include "fixtures.h"
#include "NNPackages.h"
{
NNFW_STATUS res = NNFW_STATUS_ERROR;
- ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_set_config(_session, "EXECUTOR", "Linear"), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+ NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "EXECUTOR", "Linear"));
// input and output values
const std::vector<float> input1 = {0, 1, 2, 3, 4, 5, 6, 7}; // of changed shape [4, 2]
res = nnfw_set_input_tensorinfo(_session, 0, &ti);
res = nnfw_prepare(_session);
- ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(res);
nnfw_tensorinfo ti_input = {}; // Static inference result will be stored
nnfw_input_tensorinfo(_session, 0, &ti_input);
res = nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input1.data(),
sizeof(float) * input1.size());
- ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(res);
res = nnfw_set_input(_session, 1, NNFW_TYPE_TENSOR_FLOAT32, input2.data(),
sizeof(float) * input2.size());
- ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(res);
uint64_t output_num_elements = tensorInfoNumElements(ti_output);
ASSERT_EQ(output_num_elements, expected.size());
std::vector<float> actual_output(output_num_elements);
res = nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, actual_output.data(),
sizeof(float) * actual_output.size());
- ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(res);
// Do inference
res = nnfw_run(_session);
- ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(res);
// compare
for (int i = 0; i < expected.size(); ++i)
auto package_path = NNPackages::get().getModelAbsolutePath(NNPackages::ADD);
nnfw_session *session1 = nullptr;
- ASSERT_EQ(nnfw_create_session(&session1), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_load_model_from_file(session1, package_path.c_str()), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_set_available_backends(session1, "cpu;acl_cl;acl_neon"), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_prepare(session1), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_create_session(&session1));
+ NNFW_ENSURE_SUCCESS(nnfw_load_model_from_file(session1, package_path.c_str()));
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session1, "cpu;acl_cl;acl_neon"));
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(session1));
nnfw_session *session2 = nullptr;
- ASSERT_EQ(nnfw_create_session(&session2), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_load_model_from_file(session2, package_path.c_str()), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_set_available_backends(session2, "cpu"), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_prepare(session2), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_create_session(&session2));
+ NNFW_ENSURE_SUCCESS(nnfw_load_model_from_file(session2, package_path.c_str()));
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session2, "cpu"));
+ NNFW_ENSURE_SUCCESS(nnfw_prepare(session2));
- ASSERT_EQ(nnfw_close_session(session1), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_close_session(session2), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_close_session(session1));
+ NNFW_ENSURE_SUCCESS(nnfw_close_session(session2));
}
using ValidationTestAddModelLoaded = ValidationTestModelLoaded<NNPackages::ADD>;
-TEST_F(ValidationTestAddModelLoaded, prepare_001)
-{
- ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
-}
+TEST_F(ValidationTestAddModelLoaded, prepare_001) { NNFW_ENSURE_SUCCESS(nnfw_prepare(_session)); }
TEST_F(ValidationTestAddModelLoaded, set_available_backends_001)
{
- ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
}
TEST_F(ValidationTestAddModelLoaded, get_input_size)
{
uint32_t size = 0;
- ASSERT_EQ(nnfw_input_size(_session, &size), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_input_size(_session, &size));
ASSERT_EQ(size, 1);
}
TEST_F(ValidationTestAddModelLoaded, get_output_size)
{
uint32_t size = 0;
- ASSERT_EQ(nnfw_output_size(_session, &size), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_output_size(_session, &size));
ASSERT_EQ(size, 1);
}
TEST_F(ValidationTestAddModelLoaded, output_tensorinfo)
{
nnfw_tensorinfo tensor_info;
- ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &tensor_info), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &tensor_info));
ASSERT_EQ(tensor_info.rank, 1);
ASSERT_EQ(tensor_info.dims[0], 1);
}
-TEST_F(ValidationTestAddModelLoaded, neg_run_001)
+TEST_F(ValidationTestAddModelLoaded, neg_run)
{
- ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_ERROR);
+ // nnfw_prepare is not called
+ ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_INVALID_STATE);
}
-TEST_F(ValidationTestAddModelLoaded, neg_set_input_001)
+TEST_F(ValidationTestAddModelLoaded, neg_set_input)
{
- ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0), NNFW_STATUS_ERROR);
+ // nnfw_prepare is not called
+ ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
+ NNFW_STATUS_INVALID_STATE);
}
-TEST_F(ValidationTestAddModelLoaded, neg_set_output_001)
+TEST_F(ValidationTestAddModelLoaded, neg_set_output)
{
- ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0), NNFW_STATUS_ERROR);
+ // nnfw_prepare is not called
+ ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
+ NNFW_STATUS_INVALID_STATE);
}
TEST_F(ValidationTestAddModelLoaded, neg_get_input_size)
{
- ASSERT_EQ(nnfw_input_size(_session, nullptr), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_input_size(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
}
TEST_F(ValidationTestAddModelLoaded, neg_get_output_size)
{
- ASSERT_EQ(nnfw_output_size(_session, nullptr), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_output_size(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
}
TEST_F(ValidationTestAddModelLoaded, neg_load_model)
// load model twice
ASSERT_EQ(nnfw_load_model_from_file(
_session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
- NNFW_STATUS_ERROR);
+ NNFW_STATUS_INVALID_STATE);
}
TEST_F(ValidationTestAddModelLoaded, neg_output_tensorinfo)
{
// tensor_info is null
- ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, nullptr), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
}
{
SetInOutBuffers();
_input[0] = 3.0;
- ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_run(_session));
ASSERT_FLOAT_EQ(_output[0], 5.0);
}
{
SetInOutBuffers();
_input[0] = 4.0;
- ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_run(_session));
ASSERT_FLOAT_EQ(_output[0], 6.0);
_input[0] = 5.0f;
- ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_run(_session));
ASSERT_FLOAT_EQ(_output[0], 7.0);
}
{
SetInOutBuffers();
_input[0] = 3.0;
- ASSERT_EQ(nnfw_run_async(_session), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_await(_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_run_async(_session));
+ NNFW_ENSURE_SUCCESS(nnfw_await(_session));
ASSERT_FLOAT_EQ(_output[0], 5.0);
}
TEST_F(ValidationTestAddSessionPrepared, get_input_size)
{
uint32_t size = 0;
- ASSERT_EQ(nnfw_input_size(_session, &size), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_input_size(_session, &size));
ASSERT_EQ(size, 1);
}
TEST_F(ValidationTestAddSessionPrepared, get_output_size)
{
uint32_t size = 0;
- ASSERT_EQ(nnfw_output_size(_session, &size), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_output_size(_session, &size));
ASSERT_EQ(size, 1);
}
TEST_F(ValidationTestAddSessionPrepared, output_tensorinfo)
{
nnfw_tensorinfo tensor_info;
- ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &tensor_info), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &tensor_info));
ASSERT_EQ(tensor_info.rank, 1);
ASSERT_EQ(tensor_info.dims[0], 1);
}
TEST_F(ValidationTestAddSessionPrepared, neg_await_after_sync_run)
{
SetInOutBuffers();
- ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_run(_session));
ASSERT_EQ(nnfw_await(_session), NNFW_STATUS_ERROR);
}
TEST_F(ValidationTestAddSessionPrepared, neg_await_twice)
{
SetInOutBuffers();
- ASSERT_EQ(nnfw_run_async(_session), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_await(_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_run_async(_session));
+ NNFW_ENSURE_SUCCESS(nnfw_await(_session));
ASSERT_EQ(nnfw_await(_session), NNFW_STATUS_ERROR);
}
TEST_F(ValidationTestAddSessionPrepared, neg_run_during_async_run)
{
SetInOutBuffers();
- ASSERT_EQ(nnfw_run_async(_session), NNFW_STATUS_NO_ERROR);
- EXPECT_EQ(nnfw_run(_session), NNFW_STATUS_ERROR);
- ASSERT_EQ(nnfw_await(_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_run_async(_session));
+ EXPECT_EQ(nnfw_run(_session), NNFW_STATUS_INVALID_STATE);
+ NNFW_ENSURE_SUCCESS(nnfw_await(_session));
}
TEST_F(ValidationTestAddSessionPrepared, neg_set_input_001)
TEST_F(ValidationTestAddSessionPrepared, neg_get_input_size)
{
- ASSERT_EQ(nnfw_input_size(_session, nullptr), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_input_size(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
}
TEST_F(ValidationTestAddSessionPrepared, neg_get_output_size)
{
- ASSERT_EQ(nnfw_output_size(_session, nullptr), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_output_size(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
}
TEST_F(ValidationTestAddSessionPrepared, neg_load_model)
// Load model twice
ASSERT_EQ(nnfw_load_model_from_file(
_session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
- NNFW_STATUS_ERROR);
+ NNFW_STATUS_INVALID_STATE);
}
TEST_F(ValidationTestAddSessionPrepared, neg_prepare)
{
// Call Prepare twice
- ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
}
// TODO Validation check when "nnfw_run" is called without input & output tensor setting
TEST_F(ValidationTestFourAddModelsSetInput, run_001)
{
- ASSERT_EQ(nnfw_run(_objects[0].session), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_run(_objects[1].session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_run(_objects[0].session));
+ NNFW_ENSURE_SUCCESS(nnfw_run(_objects[1].session));
}
TEST_F(ValidationTestFourAddModelsSetInput, run_002)
while (rep--)
{
for (auto obj : _objects)
- ASSERT_EQ(nnfw_run(obj.session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_run(obj.session));
}
}
TEST_F(ValidationTestFourAddModelsSetInput, run_async)
{
for (auto obj : _objects)
- ASSERT_EQ(nnfw_run_async(obj.session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_run_async(obj.session));
for (auto obj : _objects)
- ASSERT_EQ(nnfw_await(obj.session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_await(obj.session));
}
TEST_F(ValidationTestSessionCreated, close_and_create_again)
{
- ASSERT_EQ(nnfw_close_session(_session), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_create_session(&_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
+ NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
}
TEST_F(ValidationTestSessionCreated, neg_load_session_1)
TEST_F(ValidationTestSessionCreated, neg_load_session_2)
{
- ASSERT_EQ(nnfw_load_model_from_file(_session, nullptr), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_load_model_from_file(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
}
TEST_F(ValidationTestSessionCreated, neg_load_session_3)
nnfw_load_model_from_file(
_session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD_NO_MANIFEST).c_str()),
NNFW_STATUS_ERROR);
- ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
}
TEST_F(ValidationTestSessionCreated, neg_load_invalid_package_2)
_session,
NNPackages::get().getModelAbsolutePath(NNPackages::ADD_INVALID_MANIFEST).c_str()),
NNFW_STATUS_ERROR);
- ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
}
TEST_F(ValidationTestSessionCreated, neg_prepare_001)
{
// nnfw_load_model_from_file was not called
- ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
}
TEST_F(ValidationTestSessionCreated, neg_run_001)
{
// nnfw_load_model_from_file and nnfw_prepare was not called
- ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_INVALID_STATE);
}
TEST_F(ValidationTestSessionCreated, neg_set_input_001)
{
- // Invalid state
- ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
+ NNFW_STATUS_INVALID_STATE);
}
TEST_F(ValidationTestSessionCreated, neg_set_output_001)
{
- // Invalid state
- ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
+ NNFW_STATUS_INVALID_STATE);
}
TEST_F(ValidationTestSessionCreated, neg_get_input_size)
{
uint32_t size = 10000;
- ASSERT_EQ(nnfw_input_size(_session, &size), NNFW_STATUS_ERROR);
- ASSERT_EQ(size, 10000);
+ ASSERT_EQ(nnfw_input_size(_session, &size), NNFW_STATUS_INVALID_STATE);
+ ASSERT_EQ(size, 10000); // Remain unchanged
}
TEST_F(ValidationTestSessionCreated, neg_get_output_size)
{
uint32_t size = 10000;
- ASSERT_EQ(nnfw_output_size(_session, &size), NNFW_STATUS_ERROR);
- ASSERT_EQ(size, 10000);
+ ASSERT_EQ(nnfw_output_size(_session, &size), NNFW_STATUS_INVALID_STATE);
+ ASSERT_EQ(size, 10000); // Remain unchanged
}
TEST_F(ValidationTestSessionCreated, neg_output_tensorinfo)
{
nnfw_tensorinfo tensor_info;
// model is not loaded
- ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &tensor_info), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &tensor_info), NNFW_STATUS_INVALID_STATE);
// model is not loaded and tensor_info is null
- ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, nullptr), NNFW_STATUS_ERROR);
+ ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, nullptr), NNFW_STATUS_INVALID_STATE);
}
TEST_F(ValidationTestSingleSession, create_001)
{
- ASSERT_EQ(nnfw_create_session(&_session), NNFW_STATUS_NO_ERROR);
- ASSERT_EQ(nnfw_close_session(_session), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
+ NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
}
TEST_F(ValidationTestSingleSession, query_info_u32)
{
uint32_t val = 0;
- ASSERT_EQ(nnfw_query_info_u32(nullptr, NNFW_INFO_ID_VERSION, &val), NNFW_STATUS_NO_ERROR);
+ NNFW_ENSURE_SUCCESS(nnfw_query_info_u32(nullptr, NNFW_INFO_ID_VERSION, &val));
}
TEST_F(ValidationTestSingleSession, neg_create_001)
#include "NNPackages.h"
+#define NNFW_ENSURE_SUCCESS(EXPR) ASSERT_EQ((EXPR), NNFW_STATUS_NO_ERROR)
+
inline uint64_t num_elems(const nnfw_tensorinfo *ti)
{
uint64_t n = 1;
return()
endif(NOT INSTALL_TEST_SCRIPTS)
-# Install test scripts
-file(GLOB TEST_SCRIPTS "*.sh")
-install(PROGRAMS ${TEST_SCRIPTS} DESTINATION tests/scripts)
+# Install test driver
+file(GLOB TEST_DRIVER_SCRIPT onert-test)
+install(PROGRAMS ${TEST_DRIVER_SCRIPT} DESTINATION test)
-# Install test list
-file(GLOB TEST_LISTS "list/*.txt")
-install(FILES ${TEST_LISTS} DESTINATION tests/scripts/list)
+# Commands don't have execute permission itself
+install(DIRECTORY command DESTINATION test)
+
+# Install models test script
+file(GLOB MODEL_TEST_SCRIPT "models/run_test.sh")
+install(PROGRAMS ${MODEL_TEST_SCRIPT} DESTINATION test/models)
-# Install framework test script
-file(GLOB FRAMEWORKTEST_SCRIPT "framework/run_test.sh")
-install(PROGRAMS ${FRAMEWORKTEST_SCRIPT} DESTINATION tests/scripts/framework)
+# Install models test list file
+file(GLOB MODEL_TEST_DIR models/config)
+install(DIRECTORY ${MODEL_TEST_DIR} DESTINATION test/models)
-# Install framework test list file
-file(GLOB FRAMEWORKTEST_DIR framework/tests)
-install(DIRECTORY ${FRAMEWORKTEST_DIR} DESTINATION tests/scripts/framework)
+# Install nnpackage test config
+file(GLOB MODEL_TEST_DIR LIST_DIRECTORIES true nnfw_api_gtest/models/*)
+install(DIRECTORY ${MODEL_TEST_DIR} DESTINATION test/models/nnpackage)
+
+# Install test list
+file(GLOB TEST_LIST_DIR list)
+install(DIRECTORY ${TEST_LIST_DIR} DESTINATION test)
source $MY_PATH/common.sh
-BENCHMARK_RUN_TEST_SH=
BENCHMARK_DRIVER_BIN=
BENCHMARK_REPORT_DIR=
BENCHMARK_MODELS_FILE=
function Usage()
{
- echo "Usage: ./$0 --reportdir=. --runtestsh=tests/scripts/framework/run_test.sh --driverbin=Product/out/bin/tflite_run"
+ echo "Usage: ./$0 --reportdir=. --driverbin=Product/out/bin/tflite_run"
}
for i in "$@"
--test_op)
TEST_OP="true"
;;
- --runtestsh=*)
- BENCHMARK_RUN_TEST_SH=${i#*=}
- ;;
--driverbin=*)
BENCHMARK_DRIVER_BIN=${i#*=}
;;
local REPORT_MODEL_DIR=$2
local PAUSE_TIME_IN_SEC=$3
local BENCHMARK_DRIVER_BIN=$4
- local BENCHMARK_RUN_TEST_SH=$5
- local EXECUTORS=$6
- local BACKEND_LIST=$7
+ local EXECUTORS=$5
+ local BACKEND_LIST=$6
export USE_NNAPI=1
done
export BACKENDS=$BACKENDS_TO_USE
if [ "$TEST_OP" == "false" ]; then
- profile_for_he_shed $REPORT_MODEL_DIR $BENCHMARK_RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $PROFILING_RUN_CNT
+ profile_for_he_shed $REPORT_MODEL_DIR $BENCHMARK_DRIVER_BIN $MODEL $PROFILING_RUN_CNT
fi
for executor in $EXECUTORS; do
export EXECUTOR=$executor
if [ "$TEST_OP" == "false" ]; then
- run_with_he_scheduler $REPORT_MODEL_DIR $BENCHMARK_RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $executor
+ run_with_he_scheduler $REPORT_MODEL_DIR $BENCHMARK_DRIVER_BIN $MODEL $executor
fi
for backend in $BACKEND_LIST; do
export OP_BACKEND_ALLOPS=$backend
run_benchmark_and_print "tflite_onert_"$executor"_executor_$backend" "TFLite onert $executor Executor $backend"\
- $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN $BENCHMARK_RUN_TEST_SH
+ $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN
done
done
unset USE_NNAPI EXECUTOR OP_BACKEND_ALLOPS BACKENDS
# TFLite+CPU
unset USE_NNAPI
- run_benchmark_and_print "tflite_cpu" "TFLite CPU" $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN $BENCHMARK_RUN_TEST_SH
+ run_benchmark_and_print "tflite_cpu" "TFLite CPU" $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN
# run onert
if [ "$TEST_OP" == "true" ]; then
# Operation test don't need to test each scheduler
- run_onert_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN $BENCHMARK_RUN_TEST_SH "Linear" "$BACKEND_LIST"
+ run_onert_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN "Linear" "$BACKEND_LIST"
else
- run_onert_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN $BENCHMARK_RUN_TEST_SH "$EXECUTORS" "$BACKEND_LIST"
+ run_onert_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN "$EXECUTORS" "$BACKEND_LIST"
fi
if [[ $i -ne $(echo $BENCHMARK_MODEL_LIST | wc -w)-1 ]]; then
progname=$(basename "${BASH_SOURCE[0]}")
indir="."
outdir="."
-nnpkg_run=${nnpkg_run:-"Product/out/bin/nnpackage_run"}
+nnpkg_run=${nnpkg_run:-"nnpackage_run"}
difftool=${difftool:-"h5diff"}
delete_dumped_on_failure=0
usage() {
- echo "Usage: $progname [options] nnpackage_test"
+ echo "Usage: $0 $progname [options] nnpackage_test"
echo "Run an nnpackage testcase"
echo ""
echo "Returns"
echo " (dumped file are always deleted on success) (default=$delete_dumped_on_failure)"
echo ""
echo "Environment variables:"
- echo " nnpackage_run path to nnpackage_run (default=Product/out/bin/nnpackage_run)"
+ echo " nnpackage_run path to nnpackage_run (default=nnpackage_run)"
echo " difftool path to i5diff or h5diff (default=h5diff)"
echo ""
echo "Examples:"
- echo " $progname Add_000 => run $indir/Add_000 and check output"
- echo " $progname -i nnpkg-tcs Add_000 => run nnpkg-tcs/Add_000 and check output"
+ echo " $0 $progname Add_000 => run $indir/Add_000 and check output"
+ echo " $0 $progname -i nnpkg-tcs Add_000 => run nnpkg-tcs/Add_000 and check output"
exit 1
}
exit 1
fi
-if [ ! -e Product ]; then
- echo "error: please make sure to run this script in nnfw home."
- exit 1
-fi
-
tcname=$(basename "$1")
nnpkg="$indir/$tcname"
if ! command_exists $nnpkg_run; then
echo "error: runner "$nnpkg_run" does not exist."
+ echo " if $nnpkg_run exists, please set PATH to $nnpkg_run"
exit 1
fi
--- /dev/null
+#!/bin/bash
+#
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
+
+MD5_CHECK="on"
+DOWNLOAD_MODEL="all"
+
+function Usage()
+{
+ echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [OPTIONS]"
+ echo ""
+ echo "Options:"
+ echo " --ignoremd5 Ignore MD5 check when download model files"
+ echo " --model=(all|nnpackage|tflite) Download test model (default=all)"
+}
+
+for i in "$@"
+do
+ case $i in
+ -h|--help|help)
+ Usage
+ exit 1
+ ;;
+ --ignoremd5)
+ MD5_CHECK="off"
+ ;;
+ --model=*)
+ DOWNLOAD_MODEL=${i#*=}
+ ;;
+ *)
+ echo "Unknown option: $i"
+ exit 1
+ ;;
+ esac
+ shift
+done
+
+if [[ $DOWNLOAD_MODEL == "all" ]] || [[ $DOWNLOAD_MODEL == "tflite" ]]; then
+ # Download tflite models
+ $INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK
+fi
+
+if [[ $DOWNLOAD_MODEL == "all" ]] || [[ $DOWNLOAD_MODEL == "nnpackage" ]]; then
+ # Download nnpackage model
+ NNPACKAGE_CONFIG_DIR=$INSTALL_DIR/test/models/nnpackage/
+ NNPACKAGE_CACHE_DIR=$INSTALL_DIR/unittest_standalone/nnfw_api_gtest_models/
+ $INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK \
+ --configdir=$NNPACKAGE_CONFIG_DIR --cachedir=$NNPACKAGE_CACHE_DIR
+fi
# See the License for the specific language governing permissions and
# limitations under the License.
+COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
UNITTEST_REPORT_DIR=
-UNITTEST_TEST_DIR=
+UNITTEST_TEST_DIR=$INSTALL_DIR/unittest
UNITTEST_RESULT=0
UNITTEST_RUN_ALL=""
function Usage()
{
# TODO: Fill this
- echo "Usage: LD_LIBRARY_PATH=Product/out/lib ./$0 --reportdir=report --unittestdir=Product/out/unittest"
+ echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [OPTIONS]"
+ echo ""
+ echo "Options:"
+ echo " --reportdir=PATH Path to write unittest report"
+ echo " --unittestdir=PATH Path to run unittest (default: $UNITTEST_TEST_DIR"
}
-get_gtest_option()
+function get_gtest_option()
{
local UNITTEST_REPORT_FILE=$(basename $TEST_BIN)
- local output_option="--gtest_output=xml:$UNITTEST_REPORT_DIR/$UNITTEST_REPORT_FILE.xml"
+ local output_option
local filter_option
+ if [ -n "$UNITTEST_REPORT_DIR" ]; then
+ output_option="--gtest_output=xml:$UNITTEST_REPORT_DIR/$UNITTEST_REPORT_FILE.xml"
+ fi
if [ -r "$TEST_BIN.skip" ]; then
filter_option="--gtest_filter=-$(grep -v '#' "$TEST_BIN.skip" | tr '\n' ':')"
fi
--unittestdir=*)
UNITTEST_TEST_DIR=${i#*=}
;;
- --runall)
- UNITTEST_RUN_ALL="true"
+ *)
+ echo "Unknown option: $i"
+ exit 1
+ ;;
esac
shift
done
-# TODO: handle exceptions for params
-
-if [ ! -e "$UNITTEST_REPORT_DIR" ]; then
+if [ -n "$UNITTEST_REPORT_DIR" ] && [ ! -e "$UNITTEST_REPORT_DIR" ]; then
mkdir -p $UNITTEST_REPORT_DIR
fi
echo "============================================"
echo "Starting set $num_unittest: $TEST_BIN..."
echo "============================================"
- TEMP_UNITTEST_RESULT=0
- if [ "$UNITTEST_RUN_ALL" == "true" ]; then
- for TEST_LIST_VERBOSE_LINE in $($TEST_BIN --gtest_list_tests); do
- if [[ $TEST_LIST_VERBOSE_LINE == *\. ]]; then
- TEST_LIST_CATEGORY=$TEST_LIST_VERBOSE_LINE
- else
- TEST_LIST_ITEM="$TEST_LIST_CATEGORY""$TEST_LIST_VERBOSE_LINE"
- $TEST_BIN --gtest_filter=$TEST_LIST_ITEM --gtest_output="xml:$UNITTEST_REPORT_DIR/$TEST_LIST_ITEM.xml"
- fi
- done
- else
- $TEST_BIN $(get_gtest_option)
- TEMP_UNITTEST_RESULT=$?
- fi
+ $TEST_BIN $(get_gtest_option)
+ TEMP_UNITTEST_RESULT=$?
if [[ $TEMP_UNITTEST_RESULT -ne 0 ]]; then
UNITTEST_RESULT=$TEMP_UNITTEST_RESULT
--- /dev/null
+#!/bin/bash
+#
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
+
+MD5_CHECK="on"
+TFLITE_LOADER="nnapi"
+REPORT_DIR="report"
+TEST_LIST_FILE=
+
+function Usage()
+{
+ echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [OPTIONS]"
+ echo ""
+ echo "Options:"
+ echo " --ignoremd5 Ignore MD5 check when download model files"
+ echo " --api=(nnapi|loader) TFLite model file loading API (default=$TFLITE_LOADER)"
+ echo " --reportdir=PATH Path to write report (default=$REPORT_DIR)"
+ echo " --list=FILE List file to test. Test all if list option is not passed"
+}
+
+for i in "$@"
+do
+ case $i in
+ -h|--help|help)
+ Usage
+ exit 1
+ ;;
+ --ignoremd5)
+ MD5_CHECK="off"
+ ;;
+ --api=*)
+ TFLITE_LOADER=${i#*=}
+ ;;
+ --reportdir=*)
+ REPORT_DIR=${i#*=}
+ ;;
+ --list=*)
+ TEST_LIST_FILE=${i#*=}
+ ;;
+ *)
+ echo "Unknown option: $i"
+ exit 1
+ ;;
+ esac
+ shift
+done
+
+if [ ! -z "$TEST_LIST_FILE" ]; then
+ MODELLIST=$(cat "${TEST_LIST_FILE}")
+fi
+
+if [ ! -e "$REPORT_DIR" ]; then
+ mkdir -p $REPORT_DIR
+fi
+
+TEST_RESULT=0
+TAP_NAME=verification_test.tap
+TEST_NAME="Verification"
+TEST_DRIVER=
+
+if [[ $TFLITE_LOADER == "nnapi" ]]; then
+ TEST_NAME="NNAPI Verification"
+ TEST_DRIVER=nnapi_test
+elif [[ $TFLITE_LOADER == "loader" ]]; then
+ TEST_NAME="Loader Verification"
+ TEST_DRIVER=tflite_loader_test_tool
+else
+ Usage
+ exit 1
+fi
+
+$INSTALL_DIR/test/models/run_test.sh --driverbin=$TEST_DRIVER \
+ --reportdir=$REPORT_DIR \
+ --tapname=$TAP_NAME \
+ ${MODELLIST:-} > $REPORT_DIR/verification_test.log 2>&1
+TEST_RESULT=$?
+
+if [[ $TEST_RESULT -ne 0 ]]; then
+ echo ""
+ cat $REPORT_DIR/$TAP_NAME
+ echo ""
+ echo "$TEST_NAME failed... exit code: $TEST_RESULT"
+ echo "============================================"
+ echo ""
+ exit $TEST_RESULT
+fi
+
+echo ""
+cat $REPORT_DIR/$TAP_NAME
+echo "============================================"
+echo ""
function get_result_of_benchmark_test()
{
- local RUN_TEST_SH=$1
- local DRIVER_BIN=$2
- local MODEL=$3
- local LOG_FILE=$4
+ local DRIVER_BIN=$1
+ local MODEL=$2
+ local LOG_FILE=$3
local RET=0
- $RUN_TEST_SH --driverbin="$DRIVER_BIN -r 5 -w 3" $MODEL > $LOG_FILE 2>&1
+ $MY_PATH/models/run_test.sh --driverbin="$DRIVER_BIN -r 5 -w 3" $MODEL > $LOG_FILE 2>&1
RET=$?
if [[ $RET -ne 0 ]]; then
echo "Testing $MODEL aborted... exit code: $RET"
LOG_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.txt
RESULT_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.result
print_with_dots $MSG
- RESULT=$(get_result_of_benchmark_test $BENCHMARK_RUN_TEST_SH $DRIVER_BIN $MODEL $LOG_FILE)
+ RESULT=$(get_result_of_benchmark_test $DRIVER_BIN $MODEL $LOG_FILE)
echo "$RESULT ms"
print_result_of_benchmark_test "$MSG" "$RESULT" $RESULT_FILE
sleep $PAUSE_TIME_IN_SEC
pad
reduce_max
reduce_mean
-reduce_sum
+reduce_sum/float
relu
relu6
reshape
pad
reduce_max
reduce_mean
-reduce_sum
+reduce_sum/float
relu
relu6
reshape
pack
pad
reduce_max
-reduce_sum
+reduce_sum/float
relu
relu6
reshape/reshape1
pack
pad
reduce_max
-reduce_sum
+reduce_sum/float
relu
relu6
reshape/reshape1
--- /dev/null
+MODELFILE_NAME="reduce_sum_uint8.tflite"
MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
NNFW_HOME="$(dirname $(dirname $(dirname ${MY_PATH})))"
CACHE_ROOT_PATH=$MY_PATH/"cache"
-TEST_ROOT_PATH=$MY_PATH/"tests"
+TEST_ROOT_PATH=$MY_PATH/"config"
REPORT_DIR="report"
RUN_DISABLED="true"
+function command_exists() {
+ command -v "$@" > /dev/null 2>&1
+}
+
function Usage()
{
echo "Usage: ./$0 --driverbin={such as tflite_run} {tests to test or empty for all of tests}"
echo "Usage: ./$0 --driverbin=Product/out/bin/tflite_run --reportdir=report --tapname=verification.tap avgpool1 avgpool2"
echo ""
- echo "--download - (default=off) Download model files. Other options is ignored"
- echo "--driverbin - (default=../../Product/out/bin/tflite_run) runner for runnning framework tests"
- echo "--reportdir - (default=report) directory to place tap files"
- echo "--tapname - (default=framework_test.tap) file name to be written for tap"
+ echo "--download - (default=on) Download model files"
+ echo "--run - (default=on) Test model files"
+ echo "--driverbin - (default=../../Product/out/bin/tflite_run) Runner for runnning model tests"
+ echo "--reportdir - (default=report) Directory to place tap files"
+ echo "--tapname - (default=framework_test.tap) File name to be written for tap"
+ echo "--md5 - (default=on) MD5 check when download model files"
+ echo "--configdir - (default=$TEST_ROOT_PATH) Config directory to download and test model"
+ echo "--cachedir - (default=$CACHE_ROOT_PATH) Directory to download model"
echo ""
}
return 0;
fi
# Ignore checking md5 in cache
+ # TODO Use "--md5" option only and remove IGNORE_MD5 environment variable
if [ ! -z $IGNORE_MD5 ] && [ "$IGNORE_MD5" == "1" ]; then
return 1
fi
+ if [ "$MD5_CHECK" = "off" ]; then
+ return 1
+ fi
LOCAL_HASH=$(md5sum $LOCAL_PATH | awk '{ print $1 }')
REMOTE_HASH=$(curl -ss $REMOTE_URL | md5sum | awk '{ print $1 }')
DRIVER_BIN=""
TAP_NAME="framework_test.tap"
TEST_LIST=()
-DOWNLOAD_MODE="off"
+DOWNLOAD_MODEL="on"
+RUN_TEST="on"
+MD5_CHECK="on"
# Support environment variable setting for mirror server
FIXED_MODELFILE_SERVER="${MODELFILE_SERVER:-}"
--download=*)
DOWNLOAD_MODE=${i#*=}
;;
+ --md5=*)
+ MD5_CHECK=${i#*=}
+ ;;
+ --run=*)
+ RUN_TEST=${i#*=}
+ ;;
+ --configdir=*)
+ TEST_ROOT_PATH=${i#*=}
+ ;;
+ --cachedir=*)
+ CACHE_ROOT_PATH=${i#*=}
+ ;;
*)
TEST_LIST+=( $i )
;;
DRIVER_BIN="$NNFW_HOME/Product/out/bin/tflite_run"
fi
+if [ ! -d "$TEST_ROOT_PATH" ]; then
+ echo "Cannot find config directory for test: please set proper configdir"
+ exit 1
+fi
+
# Check test driver setting
-if [ ! -e $DRIVER_BIN ] && [ "$DOWNLOAD_MODE" != "on" ]; then
+if [ ! command_exists $DRIVER_BIN ] && [ "$RUN_TEST" = "on" ]; then
echo "Cannot find test driver" $DRIVER_BIN ": please set proper DRIVER_BIN"
exit 1
fi
TEST_CACHE_PATH=$CACHE_ROOT_PATH/$TEST_NAME
MODELFILE=$TEST_CACHE_PATH/$MODELFILE_NAME
- MODELFILE_URL="$MODELFILE_SERVER_PATH/$MODELFILE_NAME"
- if [ -n "$FIXED_MODELFILE_SERVER" ]; then
- MODELFILE_URL="$FIXED_MODELFILE_SERVER/$MODELFILE_NAME"
- fi
-
- # Download model file
- if [ ! -e $TEST_CACHE_PATH ]; then
- mkdir -p $TEST_CACHE_PATH
- fi
-
- # Download unless we have it in cache (Also check md5sum)
- if need_download "$MODELFILE" "$MODELFILE_URL"; then
- echo ""
- echo "Download test file for $TEST_NAME"
- echo "======================"
-
- rm -f $MODELFILE # Remove invalid file if exists
- pushd $TEST_CACHE_PATH
- wget -nv $MODELFILE_URL
- if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
- unzip -o $MODELFILE_NAME
- fi
- popd
- fi
# Find model file for downloaded by zip
- if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
+ if [ "${MODELFILE_NAME##*.}" = "zip" ]; then
pushd $TEST_CACHE_PATH
MODELFILE=$TEST_CACHE_PATH/$(ls *.tflite)
popd
# Run driver to test framework
$DRIVER_BIN $MODELFILE
- #$DRIVER_BIN $MODELFILE
if [[ $? -eq 0 ]]; then
echo "ok $i - $TEST_NAME" >> $REPORT_DIR/$TAP_NAME
else
mkdir -p $REPORT_DIR
TESTS_TO_RUN=$(find_tests ${TEST_LIST[@]})
-if [[ "$DOWNLOAD_MODE" == "on" ]]; then
+if [ "$DOWNLOAD_MODEL" = "on" ]; then
download_tests $TESTS_TO_RUN
- exit 0;
fi
-run_tests $TESTS_TO_RUN
+if [ "$RUN_TEST" = "on" ]; then
+ run_tests $TESTS_TO_RUN
+fi
exit $?
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
+
+DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+INSTALL_PATH="$(dirname $DRIVER_PATH)"
+COMMAND_PATH=$INSTALL_PATH/test/command
+BIN_PATH=$INSTALL_PATH/bin
+
+export PATH=$BIN_PATH:$PATH
+
+function Usage()
+{
+ echo "Usage: $0 [COMMAND] ..."
+ echo "Command:"
+ for file in $COMMAND_PATH/*;
+ do
+ echo " $(basename "$file")"
+ done
+ exit 255
+}
+
+COMMAND=$1; shift
+if [[ -z $COMMAND ]] || [[ $COMMAND == "--help" ]]; then
+ Usage
+ exit 255
+fi
+
+COMMAND_FILE=$COMMAND_PATH/$COMMAND
+if [[ ! -f $COMMAND_FILE ]]; then
+ echo "ERROR: '$COMMAND' is not supported"
+ exit 255
+fi
+
+source $COMMAND_FILE $@
echo "--artifactpath - (default={test-driver.sh's path}/../../) it should contain tests/ and Product/"
echo ""
echo "Following options are needed when you want to tests of specific types. If you don't pass any one, unittest and verification will be run"
- echo "--unittest - (default=on) run unit test"
echo "--frameworktest - (default=off) run framework test"
echo "--verification - (default=on) run verification"
echo "--frameworktest_list_file - filepath of model list for test"
echo "etc."
echo "--framework_driverbin - (default=../../Product/out/bin/tflite_run) runner for runnning framework tests"
echo "--verification_driverbin - (default=../../Product/out/bin/nnapi_test) runner for runnning verification tests"
- echo "--runtestsh - (default=\$ARTIFACT_PATH/tests/scripts/framework/run_test.sh) run_test.sh with path where it is for framework test and verification"
- echo "--unittestdir - (default=\$ARTIFACT_PATH/Product/out/unittest) directory that has unittest binaries for unit test"
echo ""
echo "--reportdir - (default=\$ARTIFACT_PATH/report) directory to save report"
echo ""
ARTIFACT_PATH="$TEST_DRIVER_DIR/../../"
FRAMEWORK_DRIVER_BIN=""
VERIFICATION_DRIVER_BIN=""
-RUN_TEST_SH=""
-UNIT_TEST_DIR=""
ALLTEST_ON="true"
-UNITTEST_ON="false"
FRAMEWORKTEST_ON="false"
VERIFICATION_ON="false"
BENCHMARK_ONERT_OP_ON="false"
--verification_driverbin=*)
VERIFICATION_DRIVER_BIN=${i#*=}
;;
- --runtestsh=*)
- RUN_TEST_SH=${i#*=}
- ;;
- --unittestdir=*)
- UNIT_TEST_DIR=${i#*=}
- ;;
- --unittest)
- ALLTEST_ON="false"
- UNITTEST_ON="true"
- ;;
--frameworktest)
ALLTEST_ON="false"
FRAMEWORKTEST_ON="true"
ARTIFACT_PATH="$(readlink -f $ARTIFACT_PATH)"
-if [ -z "$RUN_TEST_SH" ]; then
- RUN_TEST_SH=$ARTIFACT_PATH/tests/scripts/framework/run_test.sh
-fi
-
-if [ ! -e "$RUN_TEST_SH" ]; then
- echo "Cannot find $RUN_TEST_SH"
- exit 1
-fi
-
if [ -z "$UNIT_TEST_DIR" ]; then
UNIT_TEST_DIR=$ARTIFACT_PATH/Product/out/unittest
fi
source $TEST_DRIVER_DIR/common.sh
-# Run unittest in each part such as Runtime
-if [ "$ALLTEST_ON" == "true" ] || [ "$UNITTEST_ON" == "true" ]; then
- $TEST_DRIVER_DIR/unittest.sh \
- --reportdir=$REPORT_DIR \
- --unittestdir=$UNIT_TEST_DIR
-fi
-
# Run tflite_run with various tflite models
if [ "$FRAMEWORKTEST_ON" == "true" ]; then
if [ -z "$FRAMEWORK_DRIVER_BIN" ]; then
fi
$TEST_DRIVER_DIR/test_framework.sh \
- --runtestsh=$RUN_TEST_SH \
--driverbin=$FRAMEWORK_DRIVER_BIN \
--reportdir=$REPORT_DIR \
--tapname=framework_test.tap \
# verification uses the same script as frameworktest does
$TEST_DRIVER_DIR/test_framework.sh \
- --runtestsh=$RUN_TEST_SH \
--driverbin=$VERIFICATION_DRIVER_BIN \
--reportdir=$REPORT_DIR \
--tapname=verification_test.tap \
$TEST_DRIVER_DIR/benchmark_nnapi.sh \
--test_op \
- --runtestsh=$RUN_TEST_SH \
--driverbin=$DRIVER_BIN \
--reportdir=$REPORT_DIR/benchmark_op \
- --modelfilepath=$ARTIFACT_PATH/tests/scripts/framework
+ --modelfilepath=$ARTIFACT_PATH/tests/scripts/models
fi
# Make json file. Actually, this process is only needed on CI. That's why it is in test-driver.sh.
# See the License for the specific language governing permissions and
# limitations under the License.
-FWTEST_RUN_TEST_SH=
+MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
FWTEST_DRIVER_BIN=
FWTEST_REPORT_DIR=
FWTEST_TAP_NAME=
{
echo "Usage Example:"
echo "./$0 \\"
- echo " --runtestsh=tests/scripts/framework/run_test.sh \\ # Test runner script path"
echo " --driverbin=Product/out/bin/tflite_run \\ # Test driver path"
echo " --frameworktest_list_file=tests/scripts/list/frameworktest_list.armv7l.cpu.txt \\"
echo " --reportdir=report \\ # Directory for the report files will be saved"
-h|--help|help)
Usage
;;
- --runtestsh=*)
- FWTEST_RUN_TEST_SH=${i#*=}
- ;;
--driverbin=*)
FWTEST_DRIVER_BIN=${i#*=}
;;
shift
done
-[ ! -z "$FWTEST_RUN_TEST_SH" ] || Usage
[ ! -z "$FWTEST_DRIVER_BIN" ] || Usage
[ ! -z "$FWTEST_REPORT_DIR" ] || Usage
[ ! -z "$FWTEST_TAP_NAME" ] || Usage
MODELLIST=$(cat "${FRAMEWORKTEST_LIST_FILE}")
fi
-$FWTEST_RUN_TEST_SH --driverbin=$FWTEST_DRIVER_BIN \
+$MY_PATH/models/run_test.sh --driverbin=$FWTEST_DRIVER_BIN \
--reportdir=$FWTEST_REPORT_DIR \
--tapname=$FWTEST_TAP_NAME \
${MODELLIST:-} \
ARTIFACT_PATH="$TEST_DRIVER_DIR/../.."
BENCHMARK_DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/tflite_run
REPORT_DIR=$ARTIFACT_PATH/report
-RUN_TEST_SH=$ARTIFACT_PATH/tests/scripts/framework/run_test.sh
+RUN_TEST_SH=$ARTIFACT_PATH/tests/scripts/models/run_test.sh
BENCHMARK_MODEL_LIST="MODELS/inception_nonslim MODELS/inception_slim MODELS/mobilenet"
if [ ! -e "$RUN_TEST_SH" ]; then
target_include_directories(nnpackage_run PRIVATE ${Boost_INCLUDE_DIRS})
target_link_libraries(nnpackage_run onert_core onert tflite_loader)
-target_link_libraries(nnpackage_run tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_tflite jsoncpp)
+target_link_libraries(nnpackage_run nnfw_lib_tflite jsoncpp)
target_link_libraries(nnpackage_run nnfw-dev)
target_link_libraries(nnpackage_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
target_link_libraries(nnpackage_run nnfw_lib_benchmark)
#include "args.h"
+#include <functional>
#include <iostream>
#include <json/json.h>
void Args::Initialize(void)
{
+ auto process_nnpackage = [&](const std::string &package_filename) {
+ _package_filename = package_filename;
+
+ std::cerr << "Package Filename " << _package_filename << std::endl;
+ if (_package_filename.empty())
+ {
+ // TODO Print usage instead of the below message
+ std::cerr << "Please specify nnpackage file. Run with `--help` for usage."
+ << "\n";
+
+ exit(1);
+ }
+ else
+ {
+ if (access(_package_filename.c_str(), F_OK) == -1)
+ {
+ std::cerr << "nnpackage not found: " << _package_filename << "\n";
+ }
+ }
+ };
+
+ auto process_output_sizes = [&](const std::string &output_sizes_json_str) {
+ Json::Value root;
+ Json::Reader reader;
+ if (!reader.parse(output_sizes_json_str, root, false))
+ {
+ std::cerr << "Invalid JSON format for output_sizes \"" << output_sizes_json_str << "\"\n";
+ exit(1);
+ }
+
+ auto arg_map = argArrayToMap(root);
+ for (auto &pair : arg_map)
+ {
+ uint32_t key = pair.first;
+ Json::Value &val_json = pair.second;
+ if (!val_json.isUInt())
+ {
+ std::cerr << "All the values in `output_sizes` must be unsigned integers\n";
+ exit(1);
+ }
+ uint32_t val = val_json.asUInt();
+ _output_sizes[key] = val;
+ }
+ };
+
+ auto process_shape_prepare = [&](const std::string &shape_str) {
+ try
+ {
+ handleShapeParam(_shape_prepare, shape_str);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "error with '--shape_prepare' option: " << shape_str << std::endl;
+ exit(1);
+ }
+ };
+
+ auto process_shape_run = [&](const std::string &shape_str) {
+ try
+ {
+ handleShapeParam(_shape_run, shape_str);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "error with '--shape_run' option: " << shape_str << std::endl;
+ exit(1);
+ }
+ };
+
// General options
po::options_description general("General options", 100);
general.add_options()
("help,h", "Print available options")
("version", "Print version and exit immediately")
- ("nnpackage", po::value<std::string>()->required())
+ ("nnpackage", po::value<std::string>()->required()->notifier(process_nnpackage))
#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
- ("dump,d", po::value<std::string>()->default_value(""), "Output filename")
- ("load,l", po::value<std::string>()->default_value(""), "Input filename")
+ ("dump,d", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_filename = v; }), "Output filename")
+ ("load,l", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_filename = v; }), "Input filename")
#endif
- ("output_sizes", po::value<std::string>(),
+ ("output_sizes", po::value<std::string>()->notifier(process_output_sizes),
"The output buffer size in JSON 1D array\n"
"If not given, the model's output sizes are used\n"
"e.g. '[0, 40, 2, 80]' to set 0th tensor to 40 and 2nd tensor to 80.\n")
- ("num_runs,r", po::value<int>()->default_value(1), "The number of runs")
- ("warmup_runs,w", po::value<int>()->default_value(0), "The number of warmup runs")
- ("run_delay,t", po::value<int>()->default_value(-1), "Delay time(ms) between runs (as default no delay")
- ("gpumem_poll,g", po::value<bool>()->default_value(false), "Check gpu memory polling separately")
- ("mem_poll,m", po::value<bool>()->default_value(false), "Check memory polling")
- ("write_report,p", po::value<bool>()->default_value(false),
+ ("num_runs,r", po::value<int>()->default_value(1)->notifier([&](const auto &v) { _num_runs = v; }), "The number of runs")
+ ("warmup_runs,w", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _warmup_runs = v; }), "The number of warmup runs")
+ ("run_delay,t", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _run_delay = v; }), "Delay time(ms) between runs (as default no delay")
+ ("gpumem_poll,g", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _gpumem_poll = v; }), "Check gpu memory polling separately")
+ ("mem_poll,m", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _mem_poll = v; }), "Check memory polling")
+ ("write_report,p", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _write_report = v; }),
"Write report\n"
"{exec}-{nnpkg}-{backend}.csv will be generated.\n"
"e.g. nnpackage_run-UNIT_Add_000-acl_cl.csv.\n"
"{nnpkg} name may be changed to realpath if you use symbolic-link.")
- ("shape_prepare", po::value<std::string>()->default_value("[]"),
+ ("shape_prepare", po::value<std::string>()->default_value("[]")->notifier(process_shape_prepare),
"set shape of specified tensor before compilation\n"
"e.g. '[0, [1, 2], 2, []]' to set 0th tensor to [1, 2] and 2nd tensor to [].\n")
- ("shape_run", po::value<std::string>()->default_value("[]"),
+ ("shape_run", po::value<std::string>()->default_value("[]")->notifier(process_shape_run),
"set shape of specified tensor right before running\n"
"e.g. '[1, [1, 2]]` to set 1st tensor to [1, 2].\n")
- ("verbose_level,v", po::value<int>()->default_value(0), "Verbose level\n"
+ ("verbose_level,v", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _verbose_level = v; }),
+ "Verbose level\n"
"0: prints the only result. Messages btw run don't print\n"
"1: prints result and message btw run\n"
"2: prints all of messages to print\n")
return;
}
- po::notify(vm);
try
{
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
- if (vm.count("dump"))
- {
- _dump_filename = vm["dump"].as<std::string>();
- }
-
- if (vm.count("load"))
- {
- _load_filename = vm["load"].as<std::string>();
- }
-#endif
-
- if (vm.count("nnpackage"))
- {
- _package_filename = vm["nnpackage"].as<std::string>();
-
- if (_package_filename.empty())
- {
- // TODO Print usage instead of the below message
- std::cerr << "Please specify nnpackage file. Run with `--help` for usage."
- << "\n";
-
- exit(1);
- }
- else
- {
- if (access(_package_filename.c_str(), F_OK) == -1)
- {
- std::cerr << "nnpackage not found: " << _package_filename << "\n";
- }
- }
- }
-
- if (vm.count("output_sizes"))
- {
- auto output_sizes_json_str = vm["output_sizes"].as<std::string>();
-
- Json::Value root;
- Json::Reader reader;
- if (!reader.parse(output_sizes_json_str, root, false))
- {
- std::cerr << "Invalid JSON format for output_sizes \"" << output_sizes_json_str << "\"\n";
- exit(1);
- }
-
- auto arg_map = argArrayToMap(root);
- for (auto &pair : arg_map)
- {
- uint32_t key = pair.first;
- Json::Value &val_json = pair.second;
- if (!val_json.isUInt())
- {
- std::cerr << "All the values in `output_sizes` must be unsigned integers\n";
- exit(1);
- }
- uint32_t val = val_json.asUInt();
- _output_sizes[key] = val;
- }
- }
-
- if (vm.count("num_runs"))
- {
- _num_runs = vm["num_runs"].as<int>();
- }
-
- if (vm.count("warmup_runs"))
- {
- _warmup_runs = vm["warmup_runs"].as<int>();
- }
-
- if (vm.count("run_delay"))
- {
- _run_delay = vm["run_delay"].as<int>();
- }
-
- if (vm.count("gpumem_poll"))
- {
- _gpumem_poll = vm["gpumem_poll"].as<bool>();
- }
-
- if (vm.count("mem_poll"))
- {
- _mem_poll = vm["mem_poll"].as<bool>();
- // Instead of EXECUTE to avoid overhead, memory polling runs on WARMUP
- if (_mem_poll && _warmup_runs == 0)
- {
- _warmup_runs = 1;
- }
- }
-
- if (vm.count("write_report"))
- {
- _write_report = vm["write_report"].as<bool>();
- }
-
- if (vm.count("verbose_level"))
- {
- _verbose_level = vm["verbose_level"].as<int>();
- }
+ po::notify(vm);
}
catch (const std::bad_cast &e)
{
- std::cerr << "error by bad cast" << e.what() << '\n';
+ std::cerr << "Bad cast error - " << e.what() << '\n';
exit(1);
}
- if (vm.count("shape_prepare"))
- {
- std::string shape_str;
- try
- {
- shape_str = vm["shape_prepare"].as<std::string>();
- }
- catch (const std::bad_cast &e)
- {
- std::cerr << "error by bad cast with '--shape_prepare' option" << e.what() << '\n';
- exit(1);
- }
- try
- {
- handleShapeParam(_shape_prepare, shape_str);
- }
- catch (const std::exception &e)
- {
- std::cerr << "error with '--shape_prepare' option: " << shape_str << std::endl;
- exit(1);
- }
- }
-
- if (vm.count("shape_run"))
+ // This must be run after `notify` as `_warm_up_runs` must have been processed before.
+ if (vm.count("mem_poll"))
{
- std::string shape_str;
- try
- {
- shape_str = vm["shape_run"].as<std::string>();
- }
- catch (const std::bad_cast &e)
+ // Instead of EXECUTE to avoid overhead, memory polling runs on WARMUP
+ if (_mem_poll && _warmup_runs == 0)
{
- std::cerr << "error by bad cast with '--shape_run' option" << e.what() << '\n';
- exit(1);
- }
- try
- {
- handleShapeParam(_shape_run, shape_str);
- }
- catch (const std::exception &e)
- {
- std::cerr << "error with '--shape_run' option: " << shape_str << std::endl;
- exit(1);
+ _warmup_runs = 1;
}
}
}
data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT64);
break;
}
+ case NNFW_TYPE_TENSOR_UINT8:
case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
{
H5::DataSet data_set =
data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
break;
}
- case NNFW_TYPE_TENSOR_UINT8:
- {
- H5::DataSet data_set =
- value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8BE, data_space);
- data_set.write(outputs[i].data(), H5::PredType::NATIVE_UINT8);
- break;
- }
default:
throw std::runtime_error("nnpkg_run can dump f32, i32, qasymm8, bool and uint8.");
}
#endif
#include "nnfw.h"
#include "nnfw_util.h"
-#include "nnfw_debug.h"
+#include "nnfw_internal.h"
#include "randomgen.h"
#ifdef RUY_PROFILER
#include "ruy/profiler/profiler.h"
target_include_directories(tflite_loader_test_tool PRIVATE ${Boost_INCLUDE_DIRS})
target_link_libraries(tflite_loader_test_tool onert_core onert tflite_loader)
-target_link_libraries(tflite_loader_test_tool nnfw_lib_tflite tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_misc)
+target_link_libraries(tflite_loader_test_tool nnfw_lib_tflite nnfw_lib_misc)
target_link_libraries(tflite_loader_test_tool ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
install(TARGETS tflite_loader_test_tool DESTINATION bin)
target_include_directories(tflite_run PRIVATE src)
target_include_directories(tflite_run PRIVATE ${Boost_INCLUDE_DIRS})
-target_link_libraries(tflite_run tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_tflite)
+target_link_libraries(tflite_run nnfw_lib_tflite)
target_link_libraries(tflite_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
target_link_libraries(tflite_run nnfw_lib_benchmark)
void Args::Initialize(void)
{
+ auto process_input = [&](const std::string &v) {
+ _input_filename = v;
+
+ if (!_input_filename.empty())
+ {
+ if (access(_input_filename.c_str(), F_OK) == -1)
+ {
+ std::cerr << "input image file not found: " << _input_filename << "\n";
+ }
+ }
+ };
+
+ auto process_tflite = [&](const std::string &v) {
+ _tflite_filename = v;
+
+ if (_tflite_filename.empty())
+ {
+ // TODO Print usage instead of the below message
+ std::cerr << "Please specify tflite file. Run with `--help` for usage."
+ << "\n";
+
+ exit(1);
+ }
+ else
+ {
+ if (access(_tflite_filename.c_str(), F_OK) == -1)
+ {
+ std::cerr << "tflite file not found: " << _tflite_filename << "\n";
+ exit(1);
+ }
+ }
+ };
+
try
{
// General options
// clang-format off
general.add_options()
("help,h", "Display available options")
- ("input,i", po::value<std::string>()->default_value(""), "Input filename")
- ("dump,d", po::value<std::string>()->default_value(""), "Output filename")
- ("ishapes", po::value<std::vector<int>>()->multitoken(), "Input shapes")
- ("compare,c", po::value<std::string>()->default_value(""), "filename to be compared with")
- ("tflite", po::value<std::string>()->required())
- ("num_runs,r", po::value<int>()->default_value(1), "The number of runs")
- ("warmup_runs,w", po::value<int>()->default_value(0), "The number of warmup runs")
- ("run_delay,t", po::value<int>()->default_value(-1), "Delay time(ms) between runs (as default no delay")
- ("gpumem_poll,g", po::value<bool>()->default_value(false), "Check gpu memory polling separately")
+ ("input,i", po::value<std::string>()->default_value("")->notifier(process_input), "Input filename")
+ ("dump,d", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_filename = v; }), "Output filename")
+ ("ishapes", po::value<std::vector<int>>()->multitoken()->notifier([&](const auto &v) { _input_shapes = v; }), "Input shapes")
+ ("compare,c", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _compare_filename = v; }), "filename to be compared with")
+ ("tflite", po::value<std::string>()->required()->notifier(process_tflite))
+ ("num_runs,r", po::value<int>()->default_value(1)->notifier([&](const auto &v) { _num_runs = v; }), "The number of runs")
+ ("warmup_runs,w", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _warmup_runs = v; }), "The number of warmup runs")
+ ("run_delay,t", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _run_delay = v; }), "Delay time(ms) between runs (as default no delay)")
+ ("gpumem_poll,g", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _gpumem_poll = v; }), "Check gpu memory polling separately")
("mem_poll,m", po::value<bool>()->default_value(false), "Check memory polling")
- ("write_report,p", po::value<bool>()->default_value(false), "Write report")
- ("validate", po::value<bool>()->default_value(true), "Validate tflite model")
- ("verbose_level,v", po::value<int>()->default_value(0), "Verbose level\n"
+ ("write_report,p", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _write_report = v; }), "Write report")
+ ("validate", po::value<bool>()->default_value(true)->notifier([&](const auto &v) { _tflite_validate = v; }), "Validate tflite model")
+ ("verbose_level,v", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _verbose_level = v; }), "Verbose level\n"
"0: prints the only result. Messages btw run don't print\n"
"1: prints result and message btw run\n"
"2: prints all of messages to print\n")
po::notify(vm);
- if (vm.count("dump"))
- {
- _dump_filename = vm["dump"].as<std::string>();
- }
-
- if (vm.count("compare"))
- {
- _compare_filename = vm["compare"].as<std::string>();
- }
-
- if (vm.count("input"))
- {
- _input_filename = vm["input"].as<std::string>();
-
- if (!_input_filename.empty())
- {
- if (access(_input_filename.c_str(), F_OK) == -1)
- {
- std::cerr << "input image file not found: " << _input_filename << "\n";
- }
- }
- }
-
- if (vm.count("ishapes"))
- {
- _input_shapes.resize(vm["ishapes"].as<std::vector<int>>().size());
- for (auto i = 0; i < _input_shapes.size(); i++)
- {
- _input_shapes[i] = vm["ishapes"].as<std::vector<int>>()[i];
- }
- }
-
- if (vm.count("tflite"))
- {
- _tflite_filename = vm["tflite"].as<std::string>();
-
- if (_tflite_filename.empty())
- {
- // TODO Print usage instead of the below message
- std::cerr << "Please specify tflite file. Run with `--help` for usage."
- << "\n";
-
- exit(1);
- }
- else
- {
- if (access(_tflite_filename.c_str(), F_OK) == -1)
- {
- std::cerr << "tflite file not found: " << _tflite_filename << "\n";
- exit(1);
- }
- }
- }
-
- if (vm.count("num_runs"))
- {
- _num_runs = vm["num_runs"].as<int>();
- }
-
- if (vm.count("warmup_runs"))
- {
- _warmup_runs = vm["warmup_runs"].as<int>();
- }
-
- if (vm.count("run_delay"))
- {
- _run_delay = vm["run_delay"].as<int>();
- }
-
- if (vm.count("gpumem_poll"))
- {
- _gpumem_poll = vm["gpumem_poll"].as<bool>();
- }
-
+ // This must be run after `notify` as `_warm_up_runs` must have been processed before.
if (vm.count("mem_poll"))
{
_mem_poll = vm["mem_poll"].as<bool>();
_warmup_runs = 1;
}
}
-
- if (vm.count("write_report"))
- {
- _write_report = vm["write_report"].as<bool>();
- }
-
- if (vm.count("validate"))
- {
- _tflite_validate = vm["validate"].as<bool>();
- }
-
- if (vm.count("verbose_level"))
- {
- _verbose_level = vm["verbose_level"].as<int>();
- }
}
} // end of namespace TFLiteRun
+++ /dev/null
-if(NOT BUILD_TFLITE_RUN_2_2_0)
- return()
-endif()
-
-if(NOT BUILD_TENSORFLOW_LITE_2_2_0)
- set(BUILD_TENSORFLOW_LITE_2_2_0 ON)
-endif()
-
-nnfw_find_package(TensorFlowLite-2.2.0 REQUIRED)
-nnfw_find_package(Boost REQUIRED)
-
-list(APPEND TFLITE_RUN_SRCS "src/tflite_run_2_2_0.cc")
-list(APPEND TFLITE_RUN_SRCS "src/args.cc")
-
-add_executable(tflite_run_2_2_0 ${TFLITE_RUN_SRCS})
-target_include_directories(tflite_run_2_2_0 PRIVATE src)
-target_include_directories(tflite_run_2_2_0 PRIVATE ${Boost_INCLUDE_DIRS})
-
-target_link_libraries(tflite_run_2_2_0 tensorflow-lite-2.2.0 ${LIB_PTHREAD} dl)
-target_link_libraries(tflite_run_2_2_0 ${Boost_PROGRAM_OPTIONS_LIBRARY})
-target_link_libraries(tflite_run_2_2_0 nnfw_lib_benchmark nnfw_lib_misc)
-
-install(TARGETS tflite_run_2_2_0 DESTINATION bin)
--- /dev/null
+if(NOT BUILD_TFLITE_VANILLA_RUN)
+ return()
+endif()
+
+if(NOT BUILD_TENSORFLOW_LITE_2_3_0)
+ set(BUILD_TENSORFLOW_LITE_2_3_0 ON)
+endif()
+
+nnfw_find_package(TensorFlowLite-2.3.0 REQUIRED)
+nnfw_find_package(Boost REQUIRED)
+
+list(APPEND TFLITE_RUN_SRCS "src/tflite_vanilla_run.cc")
+list(APPEND TFLITE_RUN_SRCS "src/args.cc")
+
+add_executable(tflite_vanilla_run ${TFLITE_RUN_SRCS})
+target_include_directories(tflite_vanilla_run PRIVATE src)
+target_include_directories(tflite_vanilla_run PRIVATE ${Boost_INCLUDE_DIRS})
+
+target_link_libraries(tflite_vanilla_run tensorflow-lite-2.3.0 ${LIB_PTHREAD} dl)
+target_link_libraries(tflite_vanilla_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries(tflite_vanilla_run nnfw_lib_benchmark nnfw_lib_misc)
+
+install(TARGETS tflite_vanilla_run DESTINATION bin)
#include <iostream>
-namespace TFLiteRun220
+namespace TFLiteVanillaRun
{
Args::Args(const int argc, char **argv) noexcept
}
}
-} // end of namespace TFLiteRun220
+} // end of namespace TFLiteVanillaRun
* limitations under the License.
*/
-#ifndef __TFLITE_RUN_2_2_0_ARGS_H__
-#define __TFLITE_RUN_2_2_0_ARGS_H__
+#ifndef __TFLITE_VANILLA_RUN_ARGS_H__
+#define __TFLITE_VANILLA_RUN_ARGS_H__
#include <string>
#include <boost/program_options.hpp>
namespace po = boost::program_options;
-namespace TFLiteRun220
+namespace TFLiteVanillaRun
{
class Args
int _verbose_level;
};
-} // end of namespace TFLiteRun220
+} // end of namespace TFLiteVanillaRun
-#endif // __TFLITE_RUN_2_2_0_ARGS_H__
+#endif // __TFLITE_VANILLA_RUN_ARGS_H__
* @ingroup COM_AI_RUNTIME
*/
-#ifndef __TFLITE_RUN_2_2_0_TENSOR_VIEW_H__
-#define __TFLITE_RUN_2_2_0_TENSOR_VIEW_H__
+#ifndef __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
+#define __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
#include "tensorflow/lite/interpreter.h"
#include "misc/tensor/Reader.h"
#include "misc/tensor/NonIncreasingStride.h"
-namespace TFLiteRun220
+namespace TFLiteVanillaRun
{
/**
}
};
-} // namespace TFLiteRun220
+} // namespace TFLiteVanillaRun
-#endif // __TFLITE_RUN_2_2_0_TENSOR_VIEW_H__
+#endif // __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
{
tflite::StderrReporter error_reporter;
- TFLiteRun220::Args args(argc, argv);
+ TFLiteVanillaRun::Args args(argc, argv);
std::chrono::milliseconds t_model_load(0), t_prepare(0);
if (tensor->type == kTfLiteInt32)
{
// Generate singed 32-bit integer (s32) input
- auto tensor_view = TFLiteRun220::TensorView<int32_t>::make(*interpreter, o);
+ auto tensor_view = TFLiteVanillaRun::TensorView<int32_t>::make(*interpreter, o);
int32_t value = 0;
else if (tensor->type == kTfLiteUInt8)
{
// Generate unsigned 8-bit integer input
- auto tensor_view = TFLiteRun220::TensorView<uint8_t>::make(*interpreter, o);
+ auto tensor_view = TFLiteVanillaRun::TensorView<uint8_t>::make(*interpreter, o);
uint8_t value = 0;
else if (tensor->type == kTfLiteBool)
{
// Generate bool input
- auto tensor_view = TFLiteRun220::TensorView<bool>::make(*interpreter, o);
+ auto tensor_view = TFLiteVanillaRun::TensorView<bool>::make(*interpreter, o);
auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
"
model_type=""
+tf_intf_version=""
for ext in $supported_model_types; do
[ -e "$indir/$tcname"."$ext" ] && model_type=$ext
fi
if [[ "$model_type" == "pb" ]]; then
- $tf2nnpkg --info "$indir/$tcname".info --graphdef "$indir/$tcname"."$model_type" -o "$outdir"
+ [ -f "$indir/$tcname"."v2" ] && tf_intf_version="--v2"
+ $tf2nnpkg --info "$indir/$tcname".info --graphdef "$indir/$tcname"."$model_type" \
+ "$tf_intf_version" -o "$outdir"
else
$model2nnpkg -o "$outdir" "$indir/$tcname"."$model_type"
fi
# @ target
$ OP_BACKEND_ALLOPS=cpu \
-tests/scripts/nnpkg_test.sh test_model.conv2d_transpose
+onert/test/onert-test nnpkg-test test_model.conv2d_transpose
[ Run ] ./test_model.out Pass
[Compare] ./test_model.out Pass
```
# @ target
# run nnpkg with nnpackage_run and compare with h5diff
-$ tests/scripts/nnpkg_test.sh -i nnpkg-tcs cast
+$ onert/test/onert-test nnpkg-test -i nnpkg-tcs cast
```
-#!/usr/bin/python
+#!/usr/bin/env python
# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
#
return tflite.Model.ModelEnd(new_builder)
-def Finish(new_builder, new_model):
- # Cusrom implementation: identifier
- # Python API don't support identifier input yet
- # Reference: Finish(self, rootTable)) in builder.py, Finish(uoffset_t root, const char *file_identifier, bool size_prefix) in flatbuffers.h
- new_builder.Prep(new_builder.minalign,
- flatbuffers.number_types.UOffsetTFlags.bytewidth)
-
- new_builder.PrependByte(0x33)
- new_builder.PrependByte(0x4c)
- new_builder.PrependByte(0x46)
- new_builder.PrependByte(0x54)
-
- new_builder.PrependUOffsetTRelative(new_model)
- new_builder.finished = True
- return new_builder.Head()
-
-
def main(args):
input_model_file = args.input_model
oplist_file = args.opcode_list
new_input_tensors, new_output_tensors, used_tensors_dic,
used_buffers_dic, used_opcodes_dic, used_subgraphs_dic)
- Finish(new_builder, new_model)
+ new_builder.Finish(new_model, file_identifier=b'TFL3')
new_buf = new_builder.Output()
output_model_file.write(new_buf)
-# tflkit
+# tflkit
## Purpose
### TensorFlow
-TensorFlow provides some kinds of converting guideline. In Python, the [TFLiteConverter](https://www.tensorflow.org/api_docs/python/tf/contrib/lite/TFLiteConverter) class will help you to convert a TensorFlow GraphDef or SavedModel into `output_format` using TOCO. The `output_format` can be `TFLITE` or `GRAPHVIZ_DOT` format. The default `output_format` is `TFLITE`. And there is a Python command line interface for running TOCO, and its name is [tflite_convert](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/python/tflite_convert.py). This converts a TensorFlow GraphDef or SavedModel into `TFLITE` or `GRAPHVIZ_DOT` format like [TFLiteConverter](https://www.tensorflow.org/api_docs/python/tf/contrib/lite/TFLiteConverter). These two way also supports to convert a TensorFlow Keras model into `output_format`. Both functions are implemented using a tool called TOCO.
+TensorFlow provides some kinds of converting guideline. In Python, the [TFLiteConverter](https://www.tensorflow.org/api_docs/python/tf/lite/TFLiteConverter) class will help you to convert a TensorFlow GraphDef or SavedModel into `output_format` using TOCO. The `output_format` can be `TFLITE` or `GRAPHVIZ_DOT` format. The default `output_format` is `TFLITE`. And there is a Python command line interface for running TOCO, and its name is [tflite_convert](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/python/tflite_convert.py). This converts a TensorFlow GraphDef or SavedModel into `TFLITE` or `GRAPHVIZ_DOT` format like [TFLiteConverter](https://www.tensorflow.org/api_docs/python/tf/lite/TFLiteConverter). These two way also supports to convert a TensorFlow Keras model into `output_format`. Both functions are implemented using a tool called TOCO.
### with tflkit
-The tflkit uses the [tflite_convert](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/python/tflite_convert.py) python command line interface to convert a TensorFlow model into TfLite model. It only supports to convert a TensorFlow GraphDef file into `TFLITE` format file. This tool supports the creation of individual `TFLITE` files for different input shapes. When converting to multiple `TFLITE` files, it needs to put a string called `NAME` in `TFLITE_PATH`. The string `NAME` will be replaced by what is listed in teh `NAME` environment. This tool requires an information file as a parameter. There is an [example file](info/convert.template) for a convert information. The `--tensorflow_path` and `--tensorflow_version` can change the TensorFlow location. By default, it uses `externals/tensorflow` directory.
+The tflkit uses the [tflite_convert](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/python/tflite_convert.py) python command line interface to convert a TensorFlow model into TfLite model. It only supports to convert a TensorFlow GraphDef file into `TFLITE` format file. This tool supports the creation of individual `TFLITE` files for different input shapes. When converting to multiple `TFLITE` files, it needs to put a string called `NAME` in `TFLITE_PATH`. The string `NAME` will be replaced by what is listed in the `NAME` environment. This tool requires an information file as a parameter. There is an [example file](convert.template) for a convert information. The `--tensorflow_path` and `--tensorflow_version` can change the TensorFlow location. By default, it uses `externals/tensorflow` directory.
Convert information:
* GRAPHDEF_PATH : Full filepath of file containing frozen TensorFlow GraphDef.
### with tflkit
-The [optimize_for_inference.sh](optimize_for_inference.sh) file invokes the TensorFlow [optimize tool](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/optimize_for_inference.py). This tool requires a optimize information file as a parameter. Here is an [example file](info/optimize.template) for this tool. The information file needs `INPUT` and `OUTPUT` array names. The [summarize_pb.sh](summarize_pb.sh) file will help you to define the `INPUT` and `OUTPUT` array names. The `--tensorflow_path` can change the TensorFlow location. By default, it uses `externals/tensorflow` directory.
+The [optimize_for_inference.sh](optimize_for_inference.sh) file invokes the TensorFlow [optimize tool](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/optimize_for_inference.py). This tool requires a optimize information file as a parameter. Here is an [example file](optimize.template) for this tool. The information file needs `INPUT` and `OUTPUT` array names. The [summarize_pb.sh](summarize_pb.sh) file will help you to define the `INPUT` and `OUTPUT` array names. The `--tensorflow_path` can change the TensorFlow location. By default, it uses `externals/tensorflow` directory.
Optimize information:
* GRAPHDEF_PATH : Full filepath of file containing frozen TensorFlow GraphDef.
### with tflkit
-The [transform_graph.sh](transform_graph.sh) file supports to transform a TensorFlow GraphDef using various transform options. This tool requires a transform information file as a parameter and the transform options are described in the information file. There is an [example file](info/transform.template) for this tool. The information file needs `INPUT` and `OUTPUT` array names. The [summarize_pb.sh](summarize_pb.sh) file will help you to define the `INPUT` and `OUTPUT` array names. The `--tensorflow_path` can change the TensorFlow location. By default, it uses `externals/tensorflow` directory.
+The [transform_graph.sh](transform_graph.sh) file supports to transform a TensorFlow GraphDef using various transform options. This tool requires a transform information file as a parameter and the transform options are described in the information file. There is an [example file](transform.template) for this tool. The information file needs `INPUT` and `OUTPUT` array names. The [summarize_pb.sh](summarize_pb.sh) file will help you to define the `INPUT` and `OUTPUT` array names. The `--tensorflow_path` can change the TensorFlow location. By default, it uses `externals/tensorflow` directory.
Transform information:
* GRAPHDEF_PATH : Full filepath of file containing frozen TensorFlow GraphDef.
### with tflkit
-The tflkit provides the simple way to create a frozen graph using [freeze_graph](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py) tool. This tool requires an information file as a parameter. There is an [example file](info/freeze.info) for a freeze tool. Either `SAVED_MODEL` or `META_GRAPH` must be declared. And `META_GRAPH` is always used with `CKPT_PATH`. The `--tensorflow_path` can change the TensorFlow location. By default, it uses `externals/tensorflow` directory.
+The tflkit provides the simple way to create a frozen graph using [freeze_graph](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py) tool. This tool requires an information file as a parameter. There is an [example file](freeze.template) for a freeze tool. Either `SAVED_MODEL` or `META_GRAPH` must be declared. And `META_GRAPH` is always used with `CKPT_PATH`. The `--tensorflow_path` can change the TensorFlow location. By default, it uses `externals/tensorflow` directory.
Freeze information:
* SAVED_MODEL : Full directory path with TensorFlow `SavedModel` file and variables.
version=$1
-sed -i "s/^release = .*/release = \'$version\'/" ${nnfw_root}/docs/conf.py
-sed -i "s/^Version: .*/Version: $version/" ${nnfw_root}/packaging/nnfw.spec
+perl -pi -e "s/^release = .*/release = \'$version\'/" ${nnfw_root}/docs/conf.py
-IFS=. read M m p <<< $version
+perl -pi -e "s/^Version: .*/Version: $version/" ${nnfw_root}/packaging/nnfw.spec
+
+IFS=. read M m p <<< "$version"
hex=$(printf '0x%08x' $(( (($M << 24)) | (($m << 8)) | $p )))
-sed -i "s/^#define NNFW_VERSION.*/#define NNFW_VERSION $hex/" ${nnfw_root}/runtime/onert/api/include/nnfw_version.h
+perl -pi -e "s/^#define NNFW_VERSION.*/#define NNFW_VERSION $hex/" ${nnfw_root}/runtime/onert/api/include/nnfw_version.h
-sed -i "s/versionName .*$/versionName \"$version\"/" ${nnfw_root}/runtime/contrib/android/api/build.gradle
+perl -pi -e "s/versionName .*$/versionName \"$version\"/" ${nnfw_root}/runtime/contrib/android/api/build.gradle